Linux 2.3.7pre1

author Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:25:25 +0000 (15:25 -0500)

committer Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:25:25 +0000 (15:25 -0500)
author Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:25:25 +0000 (15:25 -0500)
committer Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:25:25 +0000 (15:25 -0500)
diff --git a/CREDITS b/CREDITS

index f330970ea823515c4e5dd14f64e0adc4e0cc8ea9..8dd69455a9403dbf912271c4527c47ba02f1b09b 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -624,6 +624,13 @@ S: 1123 North Oak Park Avenue
  S: Oak Park, Illinois 60302
  S: USA
  
+N: Daniel J. Frasnelli
+E: dfrasnel@alphalinux.org
+W: http://www.alphalinux.org/
+P: 1024/3EF87611 B9 F1 44 50 D3 E8 C2 80  DA E5 55 AA 56 7C 42 DA
+D: DEC Alpha hacker 
+D: Miscellaneous bug squisher
+
  N: Jim Freeman
  E: jfree@sovereign.org
  W: http://www.sovereign.org/
diff --git a/Makefile b/Makefile

index 7d16b6dfe1c34d2c7e43281b8c9b55683de95a7f..0e6449ff8b56d8b2d517b4c640e0125459b83499 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
  VERSION = 2
  PATCHLEVEL = 3
-SUBLEVEL = 6
+SUBLEVEL = 7
  EXTRAVERSION =
  
  ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c

index 22ebf7a1037e93e568ca09a306b69e6484e7606d..bef9ea2fe0f9448a1ef1fe3277076cc7a8923433 100644 (file)
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -159,10 +159,10 @@ void show_mem(void)
                         reserved++;
                 else if (PageSwapCache(mem_map+i))
                         cached++;
-               else if (!atomic_read(&mem_map[i].count))
+               else if (!page_count(mem_map+i))
                         free++;
                 else
-                       shared += atomic_read(&mem_map[i].count) - 1;
+                       shared += page_count(mem_map+i) - 1;
         }
         printk("%d pages of RAM\n",total);
         printk("%d reserved pages\n",reserved);
@@ -449,7 +449,7 @@ __initfunc(void mem_init(unsigned long start_mem, unsigned long end_mem))
                                 reservedpages++;
                         continue;
                 }
-               atomic_set(&mem_map[MAP_NR(tmp)].count, 1);
+               set_page_count(mem_map+MAP_NR(tmp), 1);
  #ifdef CONFIG_BLK_DEV_INITRD
                 if (!initrd_start || (tmp < initrd_start || tmp >=
                     initrd_end))
@@ -475,7 +475,7 @@ void free_initmem(void)
         addr = (unsigned long)(&__init_begin);
         for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
                 mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved);
-               atomic_set(&mem_map[MAP_NR(addr)].count, 1);
+               set_page_count(mem_map+MAP_NR(addr), 1);
                 free_page(addr);
         }
         printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
@@ -494,9 +494,9 @@ void si_meminfo(struct sysinfo *val)
                 if (PageReserved(mem_map+i))
                         continue;
                 val->totalram++;
-               if (!atomic_read(&mem_map[i].count))
+               if (!page_count(mem_map+i))
                         continue;
-               val->sharedram += atomic_read(&mem_map[i].count) - 1;
+               val->sharedram += page_count(mem_map+i) - 1;
         }
         val->totalram <<= PAGE_SHIFT;
         val->sharedram <<= PAGE_SHIFT;
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c

index a2d7fc10971be42053831af97c183011761db5d1..021d7a052d5aef3616b2e6d963784dd7201ce779 100644 (file)
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -603,7 +603,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bh[])
  
         /* Verify requested block sizes.  */
         for (i = 0; i < nr; i++) {
-               if (bh[i] && bh[i]->b_size != correct_size) {
+               if (bh[i]->b_size != correct_size) {
                         printk(KERN_NOTICE "ll_rw_block: device %s: "
                                "only %d-char blocks implemented (%lu)\n",
                                kdevname(bh[0]->b_dev),
diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c

index 9319b777a4f588597f3784c8a3acf5479e8fd48a..0dc5a7cca7e41e2a6e736a95e3ffce47749cf607 100644 (file)
--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -9,6 +9,7 @@
   *     Al Longyear <longyear@netcom.com>, Paul Mackerras <Paul.Mackerras@cs.anu.edu.au>
   *
   * Original release 01/11/99
+ * ==FILEDATE 19990524==
   *
   * This code is released under the GNU General Public License (GPL)
   *
@@ -72,7 +73,7 @@
   */
  
  #define HDLC_MAGIC 0x239e
-#define HDLC_VERSION "1.0"
+#define HDLC_VERSION "1.2"
  
  #include <linux/version.h>
  #include <linux/config.h>
@@ -813,6 +814,8 @@ static int n_hdlc_tty_ioctl (struct tty_struct *tty, struct file * file,
  {
         struct n_hdlc *n_hdlc = tty2n_hdlc (tty);
         int error = 0;
+       int count;
+       unsigned long flags;
         
         if (debuglevel >= DEBUG_LEVEL_INFO)     
                 printk("%s(%d)n_hdlc_tty_ioctl() called %d\n",
@@ -824,21 +827,29 @@ static int n_hdlc_tty_ioctl (struct tty_struct *tty, struct file * file,
  
         switch (cmd) {
         case FIONREAD:
-               {
-                       /* report count of read data available */
-                       /* in next available frame (if any) */
-                       int count;
-                       unsigned long flags;
-                       spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags);
-                       if (n_hdlc->rx_buf_list.head)
-                               count = n_hdlc->rx_buf_list.head->count;
-                       else
-                               count = 0;
-                       spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags);
-                       PUT_USER (error, count, (int *) arg);
-               }
+               /* report count of read data available */
+               /* in next available frame (if any) */
+               spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags);
+               if (n_hdlc->rx_buf_list.head)
+                       count = n_hdlc->rx_buf_list.head->count;
+               else
+                       count = 0;
+               spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags);
+               PUT_USER (error, count, (int *) arg);
                 break;
-               
+
+       case TIOCOUTQ:
+               /* get the pending tx byte count in the driver */
+               count = tty->driver.chars_in_buffer ?
+                               tty->driver.chars_in_buffer(tty) : 0;
+               /* add size of next output frame in queue */
+               spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags);
+               if (n_hdlc->tx_buf_list.head)
+                       count += n_hdlc->tx_buf_list.head->count;
+               spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags);
+               PUT_USER (error, count, (int*)arg);
+               break;
+
         default:
                 error = n_tty_ioctl (tty, file, cmd, arg);
                 break;
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c

index 75c8a68b9328de2c9951372eee8e866163debbf2..3b6c7eacf4fec5730e082291616ded7689e868f2 100644 (file)
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -1,6 +1,8 @@
  /*
   * linux/drivers/char/synclink.c
   *
+ * ==FILEDATE 19990610==
+ *
   * Device driver for Microgate SyncLink ISA and PCI
   * high speed multiprotocol serial adapters.
   *
@@ -43,14 +45,15 @@
   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
   * OF THE POSSIBILITY OF SUCH DAMAGE.
   */
- 
+
  #define VERSION(ver,rel,seq) (((ver)<<16) | ((rel)<<8) | (seq))
  #define BREAKPOINT() asm("   int $3");
  
  #define MAX_ISA_DEVICES 10
  
-#include <linux/config.h>
+#include <linux/config.h>      
  #include <linux/module.h>
+#include <linux/version.h>
  #include <linux/errno.h>
  #include <linux/signal.h>
  #include <linux/sched.h>
@@ -68,7 +71,7 @@
  #include <linux/mm.h>
  #include <linux/malloc.h>
  
-#if LINUX_VERSION_CODE >= VERSION(2,1,0)
+#if LINUX_VERSION_CODE >= VERSION(2,1,0) 
  #include <linux/vmalloc.h>
  #include <linux/init.h>
  #include <asm/serial.h>
@@ -209,8 +212,21 @@ typedef struct _BH_EVENT {
  } BH_EVENT, *BH_QUEUE;     /* Queue of BH actions to be done.  */
  
  #define MAX_BH_QUEUE_ENTRIES 200
+#define IO_PIN_SHUTDOWN_LIMIT (MAX_BH_QUEUE_ENTRIES/4)
  
  #define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
+
+struct _input_signal_events {
+       int     ri_up;  
+       int     ri_down;
+       int     dsr_up;
+       int     dsr_down;
+       int     dcd_up;
+       int     dcd_down;
+       int     cts_up;
+       int     cts_down;
+};
+
  /*
   * Device instance data structure
   */
@@ -266,6 +282,11 @@ struct mgsl_struct {
         int bh_running;         /* Protection from multiple */
         int isr_overflow;
         int bh_requested;
+       
+       int dcd_chkcount;               /* check counts to prevent */
+       int cts_chkcount;               /* too many IRQs if a signal */
+       int dsr_chkcount;               /* is floating */
+       int ri_chkcount;
  
         char *buffer_list;              /* virtual address of Rx & Tx buffer lists */
         unsigned long buffer_list_phys;
@@ -327,6 +348,11 @@ struct mgsl_struct {
         char flag_buf[HDLC_MAX_FRAME_SIZE];
         char char_buf[HDLC_MAX_FRAME_SIZE];     
         BOOLEAN drop_rts_on_tx_done;
+
+       BOOLEAN loopmode_insert_requested;
+       BOOLEAN loopmode_send_done_requested;
+       
+       struct  _input_signal_events    input_signal_events;
  };
  
  #define MGSL_MAGIC 0x5401
@@ -712,6 +738,13 @@ void usc_loopback_frame( struct mgsl_struct *info );
  
  void mgsl_tx_timeout(unsigned long context);
  
+
+void usc_loopmode_cancel_transmit( struct mgsl_struct * info );
+void usc_loopmode_insert_request( struct mgsl_struct * info );
+int usc_loopmode_active( struct mgsl_struct * info);
+void usc_loopmode_send_done( struct mgsl_struct * info );
+int usc_loopmode_send_active( struct mgsl_struct * info );
+
  /*
   * Defines a BUS descriptor value for the PCI adapter
   * local bus address ranges.
@@ -820,7 +853,8 @@ static int mgsl_set_txidle(struct mgsl_struct * info, int idle_mode);
  static int mgsl_txenable(struct mgsl_struct * info, int enable);
  static int mgsl_txabort(struct mgsl_struct * info);
  static int mgsl_rxenable(struct mgsl_struct * info, int enable);
-static int mgsl_wait_event(struct mgsl_struct * info, int mask);
+static int mgsl_wait_event(struct mgsl_struct * info, int * mask);
+static int mgsl_loopmode_send_done( struct mgsl_struct * info );
  
  #define jiffies_from_ms(a) ((((a) * HZ)/1000)+1)
  
@@ -865,7 +899,7 @@ MODULE_PARM(debug_level,"i");
  #endif
  
  static char *driver_name = "SyncLink serial driver";
-static char *driver_version = "1.00";
+static char *driver_version = "1.7";
  
  static struct tty_driver serial_driver, callout_driver;
  static int serial_refcount;
@@ -1001,6 +1035,7 @@ void mgsl_format_bh_queue( struct mgsl_struct *info )
  
         /* As a safety measure, mark the end of the chain with a NULL */
         info->free_bh_queue_tail->link = NULL;
+       info->isr_overflow=0;
  
  }      /* end of mgsl_format_bh_queue() */
  
@@ -1092,6 +1127,14 @@ int mgsl_bh_queue_get( struct mgsl_struct *info )
                 spin_unlock_irqrestore(&info->irq_spinlock,flags);
                 return 1;
         }
+       
+       if ( info->isr_overflow ) {
+               if (debug_level >= DEBUG_LEVEL_BH)
+                       printk("ISR overflow cleared.\n");
+               info->isr_overflow=0;
+               usc_EnableMasterIrqBit(info);
+               usc_EnableDmaInterrupts(info,DICR_MASTER);
+       }
  
         /* Mark BH routine as complete */
         info->bh_running   = 0;
@@ -1155,10 +1198,6 @@ void mgsl_bh_handler(void* Context)
                 }
         }
  
-       if ( info->isr_overflow ) {
-               printk("ISR overflow detected.\n");
-       }
-
         if ( debug_level >= DEBUG_LEVEL_BH )
                 printk( "%s(%d):mgsl_bh_handler(%s) exit\n",
                         __FILE__,__LINE__,info->device_name);
@@ -1199,6 +1238,7 @@ void mgsl_bh_receive_dma( struct mgsl_struct *info, unsigned short status )
  void mgsl_bh_transmit_data( struct mgsl_struct *info, unsigned short Datacount )
  {
         struct tty_struct *tty = info->tty;
+       unsigned long flags;
         
         if ( debug_level >= DEBUG_LEVEL_BH )
                 printk( "%s(%d):mgsl_bh_transmit_data() entry on %s\n",
@@ -1215,7 +1255,15 @@ void mgsl_bh_transmit_data( struct mgsl_struct *info, unsigned short Datacount )
                 }
                 wake_up_interruptible(&tty->write_wait);
         }
-       
+
+       /* if transmitter idle and loopmode_send_done_requested
+        * then start echoing RxD to TxD
+        */
+       spin_lock_irqsave(&info->irq_spinlock,flags);
+       if ( !info->tx_active && info->loopmode_send_done_requested )
+               usc_loopmode_send_done( info );
+       spin_unlock_irqrestore(&info->irq_spinlock,flags);
+
  }      /* End Of mgsl_bh_transmit_data() */
  
  /* mgsl_bh_status_handler()
@@ -1240,6 +1288,23 @@ void mgsl_bh_status_handler( struct mgsl_struct *info, unsigned short status )
                 printk( "%s(%d):mgsl_bh_status_handler() entry on %s\n",
                         __FILE__,__LINE__,info->device_name);
  
+       if (status & MISCSTATUS_RI_LATCHED) {
+               if (info->ri_chkcount)
+                       (info->ri_chkcount)--;
+       }
+       if (status & MISCSTATUS_DSR_LATCHED) {
+               if (info->dsr_chkcount)
+                       (info->dsr_chkcount)--;
+       }
+       if (status & MISCSTATUS_DCD_LATCHED) {
+               if (info->dcd_chkcount)
+                       (info->dcd_chkcount)--;
+       }
+       if (status & MISCSTATUS_CTS_LATCHED) {
+               if (info->cts_chkcount)
+                       (info->cts_chkcount)--;
+       }
+       
  }      /* End Of mgsl_bh_status_handler() */
  
  /* mgsl_isr_receive_status()
@@ -1259,8 +1324,21 @@ void mgsl_isr_receive_status( struct mgsl_struct *info )
                 printk("%s(%d):mgsl_isr_receive_status status=%04X\n",
                         __FILE__,__LINE__,status);
                         
-       usc_ClearIrqPendingBits( info, RECEIVE_STATUS );
-       usc_UnlatchRxstatusBits( info, status );
+       if ( (status & RXSTATUS_ABORT_RECEIVED) && 
+               info->loopmode_insert_requested &&
+               usc_loopmode_active(info) )
+       {
+               ++info->icount.rxabort;
+               info->loopmode_insert_requested = FALSE;
+ 
+               /* clear CMR:13 to start echoing RxD to TxD */
+               info->cmr_value &= ~BIT13;
+               usc_OutReg(info, CMR, info->cmr_value);
+ 
+               /* disable received abort irq (no longer required) */
+               usc_OutReg(info, RICR,
+                       (usc_InReg(info, RICR) & ~RXSTATUS_ABORT_RECEIVED));
+       }
  
         if (status & (RXSTATUS_EXITED_HUNT + RXSTATUS_IDLE_RECEIVED)) {
                 if (status & RXSTATUS_EXITED_HUNT)
@@ -1278,6 +1356,9 @@ void mgsl_isr_receive_status( struct mgsl_struct *info )
                 usc_RTCmd( info, RTCmd_PurgeRxFifo );
         }
  
+       usc_ClearIrqPendingBits( info, RECEIVE_STATUS );
+       usc_UnlatchRxstatusBits( info, status );
+
  }      /* end of mgsl_isr_receive_status() */
  
  /* mgsl_isr_transmit_status()
@@ -1300,7 +1381,7 @@ void mgsl_isr_transmit_status( struct mgsl_struct *info )
         
         usc_ClearIrqPendingBits( info, TRANSMIT_STATUS );
         usc_UnlatchTxstatusBits( info, status );
-
+ 
         if ( status & TXSTATUS_EOF_SENT )
                 info->icount.txok++;
         else if ( status & TXSTATUS_UNDERRUN )
@@ -1356,12 +1437,32 @@ void mgsl_isr_io_pin( struct mgsl_struct *info )
                       MISCSTATUS_DSR_LATCHED | MISCSTATUS_RI_LATCHED) ) {
                 icount = &info->icount;
                 /* update input line counters */
-               if (status & MISCSTATUS_RI_LATCHED)
+               if (status & MISCSTATUS_RI_LATCHED) {
+                       if ((info->ri_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
+                               usc_DisablestatusIrqs(info,SICR_RI);
                         icount->rng++;
-               if (status & MISCSTATUS_DSR_LATCHED)
+                       if ( status & MISCSTATUS_RI )
+                               info->input_signal_events.ri_up++;      
+                       else
+                               info->input_signal_events.ri_down++;    
+               }
+               if (status & MISCSTATUS_DSR_LATCHED) {
+                       if ((info->dsr_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
+                               usc_DisablestatusIrqs(info,SICR_DSR);
                         icount->dsr++;
+                       if ( status & MISCSTATUS_DSR )
+                               info->input_signal_events.dsr_up++;
+                       else
+                               info->input_signal_events.dsr_down++;
+               }
                 if (status & MISCSTATUS_DCD_LATCHED) {
+                       if ((info->dcd_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
+                               usc_DisablestatusIrqs(info,SICR_DCD);
                         icount->dcd++;
+                       if ( status & MISCSTATUS_DCD )
+                               info->input_signal_events.dcd_up++;
+                       else
+                               info->input_signal_events.dcd_down++;
  #ifdef CONFIG_HARD_PPS
                         if ((info->flags & ASYNC_HARDPPS_CD) &&
                             (status & MISCSTATUS_DCD_LATCHED))
@@ -1369,7 +1470,15 @@ void mgsl_isr_io_pin( struct mgsl_struct *info )
  #endif
                 }
                 if (status & MISCSTATUS_CTS_LATCHED)
+               {
+                       if ((info->cts_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
+                               usc_DisablestatusIrqs(info,SICR_CTS);
                         icount->cts++;
+                       if ( status & MISCSTATUS_CTS )
+                               info->input_signal_events.cts_up++;
+                       else
+                               info->input_signal_events.cts_down++;
+               }
                 wake_up_interruptible(&info->status_event_wait_q);
                 wake_up_interruptible(&info->event_wait_q);
  
@@ -1411,6 +1520,8 @@ void mgsl_isr_io_pin( struct mgsl_struct *info )
                 }
         }
  
+       mgsl_bh_queue_put(info, BH_TYPE_STATUS, status);
+       
         /* for diagnostics set IRQ flag */
         if ( status & MISCSTATUS_TXC_LATCHED ){
                 usc_OutReg( info, SICR,
@@ -1642,8 +1753,10 @@ void mgsl_isr_receive_dma( struct mgsl_struct *info )
         /* Post a receive event for BH processing. */
         mgsl_bh_queue_put( info, BH_TYPE_RECEIVE_DMA, status );
         
-       if ( status & BIT3 )
+       if ( status & BIT3 ) {
                 info->rx_overflow = 1;
+               info->icount.buf_overrun++;
+       }
  
  }      /* end of mgsl_isr_receive_dma() */
  
@@ -1696,9 +1809,9 @@ static void mgsl_interrupt(int irq, void *dev_id, struct pt_regs * regs)
                 if ( info->isr_overflow ) {
                         printk(KERN_ERR"%s(%d):%s isr overflow irq=%d\n",
                                 __FILE__,__LINE__,info->device_name, irq);
-                               /* Interrupt overflow. Reset adapter and exit. */
-//                             UscReset(info);
-//                             break;
+                       usc_DisableMasterIrqBit(info);
+                       usc_DisableDmaInterrupts(info,DICR_MASTER);
+                       break;
                 }
         }
         
@@ -1980,6 +2093,11 @@ static void mgsl_change_params(struct mgsl_struct *info)
                 usc_set_async_mode(info);
                 
         usc_set_serial_signals(info);
+       
+       info->dcd_chkcount = 0;
+       info->cts_chkcount = 0;
+       info->ri_chkcount = 0;
+       info->dsr_chkcount = 0;
  
         /* enable modem signal IRQs and read initial signal states */
         usc_EnableStatusIrqs(info,SICR_CTS+SICR_DSR+SICR_DCD+SICR_RI);          
@@ -2112,16 +2230,27 @@ static int mgsl_write(struct tty_struct * tty, int from_user,
  
         if ( info->params.mode == MGSL_MODE_HDLC ) {
                 /* operating in synchronous (frame oriented) mode */
-       
+
                 if (info->tx_active) {
                         ret = 0; goto cleanup; 
                 }
-               
+       
+               /* if operating in HDLC LoopMode and the adapter  */
+               /* has yet to be inserted into the loop, we can't */
+               /* transmit                                       */
+
+               if ( (info->params.flags & HDLC_FLAG_HDLC_LOOPMODE) &&
+                       !usc_loopmode_active(info) )
+               {
+                       ret = 0;
+                       goto cleanup;
+               }
+
                 if ( info->xmit_cnt ) {
                         /* Send accumulated from send_char() calls */
                         /* as frame and wait before accepting more data. */
                         ret = 0;
-                               
+                       
                         /* copy data from circular xmit_buf to */
                         /* transmit DMA buffer. */
                         mgsl_load_tx_dma_buffer(info,
@@ -2578,8 +2707,19 @@ static int mgsl_txenable(struct mgsl_struct * info, int enable)
                         
         spin_lock_irqsave(&info->irq_spinlock,flags);
         if ( enable ) {
-               if ( !info->tx_enabled )
+               if ( !info->tx_enabled ) {
+
                         usc_start_transmitter(info);
+                       /*--------------------------------------------------
+                        * if HDLC/SDLC Loop mode, attempt to insert the
+                        * station in the 'loop' by setting CMR:13. Upon
+                        * receipt of the next GoAhead (RxAbort) sequence,
+                        * the OnLoop indicator (CCSR:7) should go active
+                        * to indicate that we are on the loop
+                        *--------------------------------------------------*/
+                       if ( info->params.flags & HDLC_FLAG_HDLC_LOOPMODE )
+                               usc_loopmode_insert_request( info );
+               }
         } else {
                 if ( info->tx_enabled )
                         usc_stop_transmitter(info);
@@ -2604,7 +2744,12 @@ static int mgsl_txabort(struct mgsl_struct * info)
                         
         spin_lock_irqsave(&info->irq_spinlock,flags);
         if ( info->tx_active && info->params.mode == MGSL_MODE_HDLC )
-               usc_TCmd(info,TCmd_SendAbort);
+       {
+               if ( info->params.flags & HDLC_FLAG_HDLC_LOOPMODE )
+                       usc_loopmode_cancel_transmit( info );
+               else
+                       usc_TCmd(info,TCmd_SendAbort);
+       }
         spin_unlock_irqrestore(&info->irq_spinlock,flags);
         return 0;
         
@@ -2640,25 +2785,39 @@ static int mgsl_rxenable(struct mgsl_struct * info, int enable)
  /* mgsl_wait_event()   wait for specified event to occur
   *     
   * Arguments:          info    pointer to device instance data
- *                     mask    bitmask of events to wait for
- * Return Value:       bit mask of triggering event, otherwise error code
+ *                     mask    pointer to bitmask of events to wait for
+ * Return Value:       0       if successful and bit mask updated with
+ *                             of events triggerred,
+ *                     otherwise error code
   */
-static int mgsl_wait_event(struct mgsl_struct * info, int mask)
+static int mgsl_wait_event(struct mgsl_struct * info, int * mask_ptr)
  {
         unsigned long flags;
         int s;
         int rc=0;
         u16 regval;
         struct mgsl_icount cprev, cnow;
+       int events = 0;
+       int mask;
+       struct  _input_signal_events signal_events_prev, signal_events_now;
+
+       COPY_FROM_USER(rc,&mask, mask_ptr, sizeof(int));
+       if (rc) {
+               return  -EFAULT;
+       }
                  
         if (debug_level >= DEBUG_LEVEL_INFO)
                 printk("%s(%d):mgsl_wait_event(%s,%d)\n", __FILE__,__LINE__,
                         info->device_name, mask);
-                       
+
         spin_lock_irqsave(&info->irq_spinlock,flags);
-       
+
+       usc_get_serial_signals(info);
+       s = info->serial_signals;
+
         /* note the counters on entry */
         cprev = info->icount;
+       signal_events_prev = info->input_signal_events;
         
         if (mask & MgslEvent_ExitHuntMode) {
                 /* enable exit hunt mode IRQ */
@@ -2676,7 +2835,22 @@ static int mgsl_wait_event(struct mgsl_struct * info, int mask)
         
         spin_unlock_irqrestore(&info->irq_spinlock,flags);
         
-       while(!rc) {
+       /* Determine if any user requested events for input signals is currently TRUE */
+       
+       events |= (mask & ((s & SerialSignal_DSR) ?
+                       MgslEvent_DsrActive:MgslEvent_DsrInactive));
+
+       events |= (mask & ((s & SerialSignal_DCD) ?
+                       MgslEvent_DcdActive:MgslEvent_DcdInactive));
+               
+       events |= (mask & ((s & SerialSignal_CTS) ?
+                       MgslEvent_CtsActive:MgslEvent_CtsInactive));
+               
+       events |= (mask & ((s & SerialSignal_RI) ?
+                       MgslEvent_RiActive:MgslEvent_RiInactive));
+       
+
+       while(!events) {
                 /* sleep until event occurs */
                 interruptible_sleep_on(&info->event_wait_q);
                 
@@ -2687,39 +2861,52 @@ static int mgsl_wait_event(struct mgsl_struct * info, int mask)
                 }
                         
                 spin_lock_irqsave(&info->irq_spinlock,flags);
+
                 /* get icount and serial signal states */
                 cnow = info->icount;
-               s = info->serial_signals;
+               signal_events_now = info->input_signal_events;
                 spin_unlock_irqrestore(&info->irq_spinlock,flags);
+
+               if (signal_events_now.dsr_up != signal_events_prev.dsr_up && 
+                               mask & MgslEvent_DsrActive )
+                       events |= MgslEvent_DsrActive;
                 
-               rc = 0;         
+               if (signal_events_now.dsr_down != signal_events_prev.dsr_down && 
+                               mask & MgslEvent_DsrInactive )
+                       events |= MgslEvent_DsrInactive;
+
+               if (signal_events_now.dcd_up != signal_events_prev.dcd_up &&
+                               mask & MgslEvent_DcdActive )
+                       events |= MgslEvent_DcdActive;
                 
-               if (cnow.dsr != cprev.dsr)
-                       rc |= (mask & ((s & SerialSignal_DSR) ?
-                               MgslEvent_DsrActive:MgslEvent_DsrInactive));
+               if (signal_events_now.dcd_down != signal_events_prev.dcd_down &&
+                               mask & MgslEvent_DcdInactive )
+                       events |= MgslEvent_DcdInactive;
+               
+               if (signal_events_now.cts_up != signal_events_prev.cts_up &&
+                               mask & MgslEvent_CtsActive )
+                       events |= MgslEvent_CtsActive;
+               
+               if (signal_events_now.cts_down != signal_events_prev.cts_down &&
+                               mask & MgslEvent_CtsInactive )
+                       events |= MgslEvent_CtsInactive;
+               
+               if (signal_events_now.ri_up != signal_events_prev.ri_up &&
+                               mask & MgslEvent_RiActive )
+                       events |= MgslEvent_RiActive;
+               
+               if (signal_events_now.ri_down != signal_events_prev.ri_down &&
+                               mask & MgslEvent_RiInactive )
+                       events |= MgslEvent_RiInactive;
                 
-               if (cnow.dcd != cprev.dcd)
-                       rc |= (mask & ((s & SerialSignal_DCD) ?
-                               MgslEvent_DcdActive:MgslEvent_DcdInactive));
-                               
-               if (cnow.cts != cprev.cts)
-                       rc |= (mask & ((s & SerialSignal_CTS) ?
-                               MgslEvent_CtsActive:MgslEvent_CtsInactive));
-                               
-               if (cnow.rng != cprev.rng)
-                       rc |= (mask & ((s & SerialSignal_RI) ?
-                               MgslEvent_RiActive:MgslEvent_RiInactive));
-                               
                 if (cnow.exithunt != cprev.exithunt)
-                       rc |= (mask & MgslEvent_ExitHuntMode);
-                       
+                       events |= (mask & MgslEvent_ExitHuntMode);
+
                 if (cnow.rxidle != cprev.rxidle)
-                       rc |= (mask & MgslEvent_ExitHuntMode);
-                               
-               if (!rc)
-                       rc = -EIO; /* no change => error */
-                       
+                       events |= (mask & MgslEvent_IdleReceived);
+               
                 cprev = cnow;
+               signal_events_prev = signal_events_now;
         }
         
         if (mask & (MgslEvent_ExitHuntMode + MgslEvent_IdleReceived)) {
@@ -2732,7 +2919,10 @@ static int mgsl_wait_event(struct mgsl_struct * info, int mask)
                 }
                 spin_unlock_irqrestore(&info->irq_spinlock,flags);
         }
-       
+
+       if ( rc == 0 )
+               PUT_USER(rc, events, mask_ptr);
+               
         return rc;
         
  }      /* end of mgsl_wait_event() */
@@ -2772,7 +2962,7 @@ static int get_modem_info(struct mgsl_struct * info, unsigned int *value)
  
         if (debug_level >= DEBUG_LEVEL_INFO)
                 printk("%s(%d):mgsl_get_modem_info %s value=%08X\n",
-                        __FILE__,__LINE__, info->device_name, *value );
+                        __FILE__,__LINE__, info->device_name, result );
                         
         PUT_USER(err,result,value);
         return err;
@@ -2928,7 +3118,9 @@ static int mgsl_ioctl(struct tty_struct *tty, struct file * file,
                 case MGSL_IOCGSTATS:
                         return mgsl_get_stats(info,(struct mgsl_icount*)arg);
                 case MGSL_IOCWAITEVENT:
-                       return mgsl_wait_event(info,(int)arg);
+                       return mgsl_wait_event(info,(int*)arg);
+               case MGSL_IOCLOOPTXDONE:
+                       return mgsl_loopmode_send_done(info);
                 case MGSL_IOCCLRMODCOUNT:
                         while(MOD_IN_USE)
                                 MOD_DEC_USE_COUNT;
@@ -3626,11 +3818,6 @@ static inline int line_info(char *buf, struct mgsl_struct *info)
         }
         spin_unlock_irqrestore(&info->irq_spinlock,flags);
         
-#if 0 && LINUX_VERSION_CODE >= VERSION(2,1,0)
-       ret += sprintf(buf+ret, "irq_spinlock=%08X\n",
-                       info->irq_spinlock.lock );
-#endif
-       
         return ret;
         
  }      /* end of line_info() */
@@ -4227,6 +4414,18 @@ int mgsl_enumerate_devices()
                         if ( PCIBIOS_SUCCESSFUL == pcibios_find_device(
                                 MICROGATE_VENDOR_ID, SYNCLINK_DEVICE_ID, i, &bus, &func) ) {
                                 
+#if LINUX_VERSION_CODE >= VERSION(2,1,0)
+                               struct pci_dev *pdev = pci_find_slot(bus,func);
+                               irq_line = pdev->irq;                           
+#else                                                                                          
+                               if (pcibios_read_config_byte(bus,func,
+                                       PCI_INTERRUPT_LINE,&irq_line) ) {
+                                       printk( "%s(%d):USC I/O addr not set.\n",
+                                               __FILE__,__LINE__);
+                                       continue;
+                               }
+#endif
+
                                 if (pcibios_read_config_dword(bus,func,
                                         PCI_BASE_ADDRESS_3,&shared_mem_base) ) {
                                         printk( "%s(%d):Shared mem addr not set.\n",
@@ -4248,13 +4447,6 @@ int mgsl_enumerate_devices()
                                         continue;
                                 }
                                 
-                               if (pcibios_read_config_byte(bus,func,
-                                       PCI_INTERRUPT_LINE,&irq_line) ) {
-                                       printk( "%s(%d):USC I/O addr not set.\n",
-                                               __FILE__,__LINE__);
-                                       continue;
-                               }
-                               
                                 info = mgsl_allocate_device();
                                 if ( !info ) {
                                         /* error allocating device instance data */
@@ -4671,29 +4863,53 @@ void usc_set_sdlc_mode( struct mgsl_struct *info )
  {
         u16 RegValue;
  
-       /* Channel mode Register (CMR)
-        *
-        * <15..14>  00    Tx Sub modes, Underrun Action
-        * <13>      0     1 = Send Preamble before opening flag
-        * <12>      0     1 = Consecutive Idles share common 0
-        * <11..8>   0110  Transmitter mode = HDLC/SDLC
-        * <7..4>    0000  Rx Sub modes, addr/ctrl field handling
-        * <3..0>    0110  Receiver mode = HDLC/SDLC
-        *
-        * 0000 0110 0000 0110 = 0x0606
-        */
+       if ( info->params.flags & HDLC_FLAG_HDLC_LOOPMODE )
+       {
+          /*
+          ** Channel Mode Register (CMR)
+          **
+          ** <15..14>    10    Tx Sub Modes, Send Flag on Underrun
+          ** <13>        0     0 = Transmit Disabled (initially)
+          ** <12>        0     1 = Consecutive Idles share common 0
+          ** <11..8>     1110  Transmitter Mode = HDLC/SDLC Loop
+          ** <7..4>      0000  Rx Sub Modes, addr/ctrl field handling
+          ** <3..0>      0110  Receiver Mode = HDLC/SDLC
+          **
+          ** 1000 1110 0000 0110 = 0x8e06
+          */
+          RegValue = 0x8e06;
+ 
+          /*--------------------------------------------------
+           * ignore user options for UnderRun Actions and
+           * preambles
+           *--------------------------------------------------*/
+       }
+       else
+       {       
+               /* Channel mode Register (CMR)
+                *
+                * <15..14>  00    Tx Sub modes, Underrun Action
+                * <13>      0     1 = Send Preamble before opening flag
+                * <12>      0     1 = Consecutive Idles share common 0
+                * <11..8>   0110  Transmitter mode = HDLC/SDLC
+                * <7..4>    0000  Rx Sub modes, addr/ctrl field handling
+                * <3..0>    0110  Receiver mode = HDLC/SDLC
+                *
+                * 0000 0110 0000 0110 = 0x0606
+                */
  
-       RegValue = 0x0606;
+               RegValue = 0x0606;
  
-       if ( info->params.flags & HDLC_FLAG_UNDERRUN_ABORT15 )
-               RegValue |= BIT14;
-       else if ( info->params.flags & HDLC_FLAG_UNDERRUN_FLAG )
-               RegValue |= BIT15;
-       else if ( info->params.flags & HDLC_FLAG_UNDERRUN_CRC )
-               RegValue |= BIT15 + BIT14;
+               if ( info->params.flags & HDLC_FLAG_UNDERRUN_ABORT15 )
+                       RegValue |= BIT14;
+               else if ( info->params.flags & HDLC_FLAG_UNDERRUN_FLAG )
+                       RegValue |= BIT15;
+               else if ( info->params.flags & HDLC_FLAG_UNDERRUN_CRC )
+                       RegValue |= BIT15 + BIT14;
  
-       if ( info->params.preamble != HDLC_PREAMBLE_PATTERN_NONE )
-               RegValue |= BIT13;
+               if ( info->params.preamble != HDLC_PREAMBLE_PATTERN_NONE )
+                       RegValue |= BIT13;
+       }
  
         if ( info->params.flags & HDLC_FLAG_SHARE_ZERO )
                 RegValue |= BIT12;
@@ -4862,6 +5078,8 @@ void usc_set_sdlc_mode( struct mgsl_struct *info )
                 RegValue |= 0x0003;     /* RxCLK from DPLL */
         else if ( info->params.flags & HDLC_FLAG_RXC_BRG )
                 RegValue |= 0x0004;     /* RxCLK from BRG0 */
+       else if ( info->params.flags & HDLC_FLAG_RXC_TXCPIN)
+               RegValue |= 0x0006;     /* RxCLK from TXC Input */
         else
                 RegValue |= 0x0007;     /* RxCLK from Port1 */
  
@@ -4869,6 +5087,8 @@ void usc_set_sdlc_mode( struct mgsl_struct *info )
                 RegValue |= 0x0018;     /* TxCLK from DPLL */
         else if ( info->params.flags & HDLC_FLAG_TXC_BRG )
                 RegValue |= 0x0020;     /* TxCLK from BRG0 */
+       else if ( info->params.flags & HDLC_FLAG_TXC_RXCPIN)
+               RegValue |= 0x0038;     /* RxCLK from TXC Input */
         else
                 RegValue |= 0x0030;     /* TxCLK from Port0 */
  
@@ -4922,10 +5142,24 @@ void usc_set_sdlc_mode( struct mgsl_struct *info )
                 /*  of rounding up and then subtracting 1 we just don't subtract */
                 /*  the one in this case. */
  
-               Tc = (u16)((XtalSpeed/DpllDivisor)/info->params.clock_speed);
-               if ( !((((XtalSpeed/DpllDivisor) % info->params.clock_speed) * 2)
-                      / info->params.clock_speed) )
-                       Tc--;
+               /*--------------------------------------------------
+                * ejz: for DPLL mode, application should use the
+                * same clock speed as the partner system, even 
+                * though clocking is derived from the input RxData.
+                * In case the user uses a 0 for the clock speed,
+                * default to 0xffffffff and don't try to divide by
+                * zero
+                *--------------------------------------------------*/
+               if ( info->params.clock_speed )
+               {
+                       Tc = (u16)((XtalSpeed/DpllDivisor)/info->params.clock_speed);
+                       if ( !((((XtalSpeed/DpllDivisor) % info->params.clock_speed) * 2)
+                              / info->params.clock_speed) )
+                               Tc--;
+               }
+               else
+                       Tc = -1;
+                                 
  
                 /* Write 16-bit Time Constant for BRG1 */
                 usc_OutReg( info, TC1R, Tc );
@@ -6328,6 +6562,13 @@ void mgsl_load_tx_dma_buffer(struct mgsl_struct *info, const char *Buffer,
         if ( debug_level >= DEBUG_LEVEL_DATA )
                 mgsl_trace_block(info,Buffer,BufferSize,1);     
  
+       if (info->params.flags & HDLC_FLAG_HDLC_LOOPMODE) {
+               /* set CMR:13 to start transmit when
+                * next GoAhead (abort) is received
+                */
+               info->cmr_value |= BIT13;                         
+       }
+               
         /* Setup the status and RCC (Frame Size) fields of the 1st */
         /* buffer entry in the transmit DMA buffer list. */
  
@@ -6381,7 +6622,7 @@ BOOLEAN mgsl_register_test( struct mgsl_struct *info )
         unsigned int i;
         BOOLEAN rc = TRUE;
         unsigned long flags;
-       
+
         spin_lock_irqsave(&info->irq_spinlock,flags);
         usc_reset(info);
         spin_unlock_irqrestore(&info->irq_spinlock,flags);
@@ -6471,7 +6712,7 @@ BOOLEAN mgsl_irq_test( struct mgsl_struct *info )
         usc_reset(info);
         spin_unlock_irqrestore(&info->irq_spinlock,flags);
         
-       if ( !info->irq_occurred )
+       if ( !info->irq_occurred ) 
                 return FALSE;
         else
                 return TRUE;
@@ -6499,7 +6740,7 @@ BOOLEAN mgsl_dma_test( struct mgsl_struct *info )
         volatile unsigned long EndTime;
         unsigned long flags;
         MGSL_PARAMS tmp_params;
-       
+
         /* save current port options */
         memcpy(&tmp_params,&info->params,sizeof(MGSL_PARAMS));
         /* load default port options */
@@ -6657,7 +6898,7 @@ BOOLEAN mgsl_dma_test( struct mgsl_struct *info )
         /**********************************/
         /* WAIT FOR TRANSMIT FIFO TO FILL */
         /**********************************/
-                                                                                                                        
+       
         /* Wait 100ms */
         EndTime = jiffies + jiffies_from_ms(100);
  
@@ -6724,7 +6965,7 @@ BOOLEAN mgsl_dma_test( struct mgsl_struct *info )
  
         if ( rc == TRUE ){
                 /* CHECK FOR TRANSMIT ERRORS */
-               if ( status & (BIT5 + BIT1) )
+               if ( status & (BIT5 + BIT1) ) 
                         rc = FALSE;
         }
  
@@ -6981,13 +7222,90 @@ void mgsl_tx_timeout(unsigned long context)
         if(info->tx_active && info->params.mode == MGSL_MODE_HDLC) {
                 info->icount.txtimeout++;
         }
-       
         spin_lock_irqsave(&info->irq_spinlock,flags);
         info->tx_active = 0;
         info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
+
+       if ( info->params.flags & HDLC_FLAG_HDLC_LOOPMODE )
+               usc_loopmode_cancel_transmit( info );
+
         spin_unlock_irqrestore(&info->irq_spinlock,flags);
         
         mgsl_bh_transmit_data(info,0);
         
  }      /* end of mgsl_tx_timeout() */
  
+/* signal that there are no more frames to send, so that
+ * line is 'released' by echoing RxD to TxD when current
+ * transmission is complete (or immediately if no tx in progress).
+ */
+static int mgsl_loopmode_send_done( struct mgsl_struct * info )
+{
+       unsigned long flags;
+       
+       spin_lock_irqsave(&info->irq_spinlock,flags);
+       if (info->params.flags & HDLC_FLAG_HDLC_LOOPMODE) {
+               if (info->tx_active)
+                       info->loopmode_send_done_requested = TRUE;
+               else
+                       usc_loopmode_send_done(info);
+       }
+       spin_unlock_irqrestore(&info->irq_spinlock,flags);
+
+       return 0;
+}
+
+/* release the line by echoing RxD to TxD
+ * upon completion of a transmit frame
+ */
+void usc_loopmode_send_done( struct mgsl_struct * info )
+{
+       info->loopmode_send_done_requested = FALSE;
+       /* clear CMR:13 to 0 to start echoing RxData to TxData */
+       info->cmr_value &= ~BIT13;                        
+       usc_OutReg(info, CMR, info->cmr_value);
+}
+
+/* abort a transmit in progress while in HDLC LoopMode
+ */
+void usc_loopmode_cancel_transmit( struct mgsl_struct * info )
+{
+       /* reset tx dma channel and purge TxFifo */
+       usc_RTCmd( info, RTCmd_PurgeTxFifo );
+       usc_DmaCmd( info, DmaCmd_ResetTxChannel );
+       usc_loopmode_send_done( info );
+}
+
+/* for HDLC/SDLC LoopMode, setting CMR:13 after the transmitter is enabled
+ * is an Insert Into Loop action. Upon receipt of a GoAhead sequence (RxAbort)
+ * we must clear CMR:13 to begin repeating TxData to RxData
+ */
+void usc_loopmode_insert_request( struct mgsl_struct * info )
+{
+       info->loopmode_insert_requested = TRUE;
+ 
+       /* enable RxAbort irq. On next RxAbort, clear CMR:13 to
+        * begin repeating TxData on RxData (complete insertion)
+        */
+       usc_OutReg( info, RICR, 
+               (usc_InReg( info, RICR ) | RXSTATUS_ABORT_RECEIVED ) );
+               
+       /* set CMR:13 to insert into loop on next GoAhead (RxAbort) */
+       info->cmr_value |= BIT13;
+       usc_OutReg(info, CMR, info->cmr_value);
+}
+
+/* return 1 if station is inserted into the loop, otherwise 0
+ */
+int usc_loopmode_active( struct mgsl_struct * info)
+{
+       return usc_InReg( info, CCSR ) & BIT7 ? 1 : 0 ;
+}
+
+/* return 1 if USC is in loop send mode, otherwise 0
+ */
+int usc_loopmode_send_active( struct mgsl_struct * info )
+{
+       return usc_InReg( info, CCSR ) & BIT6 ? 1 : 0 ;
+}                        
+
diff --git a/drivers/usb/usb-core.c b/drivers/usb/usb-core.c

index f9f73a051201d30fae64f70583dd92a0d46f39fe..082549b6e23e8068bfdae7ccd26a3d5bb4eeee76 100644 (file)
--- a/drivers/usb/usb-core.c
+++ b/drivers/usb/usb-core.c
@@ -53,7 +53,7 @@ int usb_init(void)
                 usb_acm_init();
  #      endif
  #      ifdef CONFIG_USB_PRINTER
-               usb_print_init();
+               usb_printer_init();
  #      endif
  #      ifdef CONFIG_USB_CPIA
                 usb_cpia_init();
diff --git a/drivers/video/vgacon.c b/drivers/video/vgacon.c

index 8823d2121d767769a672770598233ef9fca137a5..95a758dd1d1c93e8b0e1554cf6f64aa255ab97a3 100644 (file)
--- a/drivers/video/vgacon.c
+++ b/drivers/video/vgacon.c
@@ -135,9 +135,17 @@ void no_scroll(char *str, int *ints)
   */
  static inline void write_vga(unsigned char reg, unsigned int val)
  {
-#ifndef SLOW_VGA
         unsigned int v1, v2;
+       unsigned long flags;
+
+       /*
+        * ddprintk might set the console position from interrupt
+        * handlers, thus the write has to be IRQ-atomic.
+        */
+       save_flags(flags);
+       cli();
  
+#ifndef SLOW_VGA
         v1 = reg + (val & 0xff00);
         v2 = reg + 1 + ((val << 8) & 0xff00);
         outw(v1, vga_video_port_reg);
@@ -148,6 +156,7 @@ static inline void write_vga(unsigned char reg, unsigned int val)
         outb_p(reg+1, vga_video_port_reg);
         outb_p(val & 0xff, vga_video_port_val);
  #endif
+       restore_flags(flags);
  }
  
  __initfunc(static const char *vgacon_startup(void))
diff --git a/fs/buffer.c b/fs/buffer.c

index 0c0d8d87e405de469d9569606911e783493eeeb6..3e1ace5d03a611d826c2c2169a0865d0db520396 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -109,11 +109,11 @@ union bdflush_param {
                 int dummy3;    /* unused */
         } b_un;
         unsigned int data[N_PARAM];
-} bdf_prm = {{40, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
+} bdf_prm = {{90, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
  
  /* These are the min and max parameter values that we will allow to be assigned */
  int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   1*HZ,   1*HZ, 1, 1};
-int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 600*HZ, 600*HZ, 2047, 5};
+int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,1000, 6000*HZ, 6000*HZ, 2047, 5};
  
  void wakeup_bdflush(int);
  
@@ -439,10 +439,14 @@ static inline void remove_from_hash_queue(struct buffer_head * bh)
  
  static inline void remove_from_lru_list(struct buffer_head * bh)
  {
-       if (!(bh->b_prev_free) || !(bh->b_next_free))
-               panic("VFS: LRU block list corrupted");
-       if (bh->b_dev == B_FREE)
-               panic("LRU list corrupted");
+       if (!(bh->b_prev_free) || !(bh->b_next_free)) {
+               printk("VFS: LRU block list corrupted\n");
+               *(int*)0 = 0;
+       }
+       if (bh->b_dev == B_FREE) {
+               printk("LRU list corrupted");
+               *(int*)0 = 0;
+       }
         bh->b_prev_free->b_next_free = bh->b_next_free;
         bh->b_next_free->b_prev_free = bh->b_prev_free;
  
@@ -478,11 +482,12 @@ static void remove_from_queues(struct buffer_head * bh)
         if(bh->b_dev == B_FREE) {
                 remove_from_free_list(bh); /* Free list entries should not be
                                               in the hash queue */
-               return;
+               goto out;
         }
         nr_buffers_type[bh->b_list]--;
         remove_from_hash_queue(bh);
         remove_from_lru_list(bh);
+out:
  }
  
  static inline void put_last_free(struct buffer_head * bh)
@@ -546,6 +551,28 @@ static void insert_into_queues(struct buffer_head * bh)
         }
  }
  
+static void insert_into_dirty_queue(struct buffer_head * bh)
+{
+       struct buffer_head **bhp;
+
+
+       bhp = &lru_list[BUF_DIRTY];
+       if(!*bhp) {
+               *bhp = bh;
+               bh->b_prev_free = bh;
+       }
+       if (bh->b_next_free)
+               BUG();
+
+       bh->b_next_free = *bhp;
+       bh->b_prev_free = (*bhp)->b_prev_free;
+       (*bhp)->b_prev_free->b_next_free = bh;
+       (*bhp)->b_prev_free = bh;
+
+       nr_buffers++;
+       nr_buffers_type[BUF_DIRTY]++;
+}
+
  struct buffer_head * find_buffer(kdev_t dev, int block, int size)
  {              
         struct buffer_head * next;
@@ -702,7 +729,7 @@ repeat:
                 if (!buffer_dirty(bh)) {
                         bh->b_flushtime = 0;
                 }
-               return bh;
+               goto out;
         }
  
         isize = BUFSIZE_INDEX(size);
@@ -718,7 +745,7 @@ get_free:
         init_buffer(bh, dev, block, end_buffer_io_sync, NULL);
         bh->b_state=0;
         insert_into_queues(bh);
-       return bh;
+       goto out;
  
         /*
          * If we block while refilling the free list, somebody may
@@ -729,6 +756,8 @@ refill:
         if (!find_buffer(dev,block,size))
                 goto get_free;
         goto repeat;
+out:
+       return bh;
  }
  
  void set_writetime(struct buffer_head * buf, int flag)
@@ -750,18 +779,41 @@ void set_writetime(struct buffer_head * buf, int flag)
  /*
   * Put a buffer into the appropriate list, without side-effects.
   */
-static inline void file_buffer(struct buffer_head *bh, int list)
+static void file_buffer(struct buffer_head *bh, int list)
  {
         remove_from_queues(bh);
         bh->b_list = list;
         insert_into_queues(bh);
  }
  
+/*
+ * if a new dirty buffer is created we need to balance bdflush.
+ */
+static inline void balance_dirty (kdev_t dev)
+{
+       int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
+
+       /* This buffer is dirty, maybe we need to start flushing.
+        * If too high a percentage of the buffers are dirty...
+        */
+       if (nr_buffers_type[BUF_DIRTY] > too_many) {
+               wakeup_bdflush(1);
+       }
+
+       /* If this is a loop device, and
+        * more than half of the buffers are dirty...
+        * (Prevents no-free-buffers deadlock with loop device.)
+        */
+       if (MAJOR(dev) == LOOP_MAJOR &&
+           nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
+               wakeup_bdflush(1);
+}
+
  /*
   * A buffer may need to be moved from one buffer list to another
   * (e.g. in case it is not shared any more). Handle this.
   */
-void refile_buffer(struct buffer_head * buf)
+void __refile_buffer(struct buffer_head * buf)
  {
         int dispose;
  
@@ -777,23 +829,8 @@ void refile_buffer(struct buffer_head * buf)
                 dispose = BUF_CLEAN;
         if(dispose != buf->b_list) {
                 file_buffer(buf, dispose);
-               if(dispose == BUF_DIRTY) {
-                       int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
-
-                       /* This buffer is dirty, maybe we need to start flushing.
-                        * If too high a percentage of the buffers are dirty...
-                        */
-                       if (nr_buffers_type[BUF_DIRTY] > too_many)
-                               wakeup_bdflush(1);
-
-                       /* If this is a loop device, and
-                        * more than half of the buffers are dirty...
-                        * (Prevents no-free-buffers deadlock with loop device.)
-                        */
-                       if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
-                           nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
-                               wakeup_bdflush(1);
-               }
+               if(dispose == BUF_DIRTY)
+                       balance_dirty(buf->b_dev);
         }
  }
  
@@ -809,6 +846,7 @@ void __brelse(struct buffer_head * buf)
  
         if (buf->b_count) {
                 buf->b_count--;
+               wake_up(&buffer_wait);
                 return;
         }
         printk("VFS: brelse: Trying to free free buffer\n");
@@ -928,7 +966,7 @@ static void put_unused_buffer_head(struct buffer_head * bh)
                 return;
         }
  
-       memset(bh,0,sizeof(*bh));
+       memset(bh, 0, sizeof(*bh));
         init_waitqueue_head(&bh->b_wait);
         nr_unused_buffer_heads++;
         bh->b_next_free = unused_list;
@@ -1153,17 +1191,12 @@ static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
         struct page *page;
  
         mark_buffer_uptodate(bh, uptodate);
-       unlock_buffer(bh);
  
         /* This is a temporary buffer used for page I/O. */
         page = mem_map + MAP_NR(bh->b_data);
-       if (!PageLocked(page))
-               goto not_locked;
-       if (bh->b_count != 1)
-               goto bad_count;
  
-       if (!test_bit(BH_Uptodate, &bh->b_state))
-               set_bit(PG_error, &page->flags);
+       if (!uptodate)
+               SetPageError(page);
  
         /*
          * Be _very_ careful from here on. Bad things can happen if
@@ -1179,69 +1212,58 @@ static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
          */
         save_flags(flags);
         cli();
-       bh->b_count--;
-       tmp = bh;
-       do {
-               if (tmp->b_count)
+       unlock_buffer(bh);
+       tmp = bh->b_this_page;
+       while (tmp != bh) {
+               if (buffer_locked(tmp)) {
                         goto still_busy;
+               }
                 tmp = tmp->b_this_page;
-       } while (tmp != bh);
+       }
  
         /* OK, the async IO on this page is complete. */
-       free_async_buffers(bh);
         restore_flags(flags);
-       clear_bit(PG_locked, &page->flags);
-       wake_up(&page->wait);
+
         after_unlock_page(page);
+       /*
+        * if none of the buffers had errors then we can set the
+        * page uptodate:
+        */
+       if (!PageError(page))
+               SetPageUptodate(page);
+       page->owner = (int)current; // HACK, FIXME, will go away.
+       UnlockPage(page);
+
         return;
  
  still_busy:
         restore_flags(flags);
         return;
-
-not_locked:
-       printk ("Whoops: end_buffer_io_async: async io complete on unlocked page\n");
-       return;
-
-bad_count:
-       printk ("Whoops: end_buffer_io_async: b_count != 1 on async io.\n");
-       return;
  }
  
-/*
- * Start I/O on a page.
- * This function expects the page to be locked and may return before I/O is complete.
- * You then have to check page->locked, page->uptodate, and maybe wait on page->wait.
- */
-int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
+static int create_page_buffers (int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
  {
-       struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
-       int block, nr;
+       struct buffer_head *head, *bh, *tail;
+       int block;
  
         if (!PageLocked(page))
-               panic("brw_page: page not locked for I/O");
-       clear_bit(PG_uptodate, &page->flags);
-       clear_bit(PG_error, &page->flags);
+               BUG();
         /*
          * Allocate async buffer heads pointing to this page, just for I/O.
-        * They do _not_ show up in the buffer hash table!
-        * They are _not_ registered in page->buffers either!
+        * They show up in the buffer hash table and are registered in
+        * page->buffers.
          */
-       bh = create_buffers(page_address(page), size, 1);
-       if (!bh) {
-               /* WSH: exit here leaves page->count incremented */
-               clear_bit(PG_locked, &page->flags);
-               wake_up(&page->wait);
-               return -ENOMEM;
-       }
-       nr = 0;
-       next = bh;
-       do {
-               struct buffer_head * tmp;
+       head = create_buffers(page_address(page), size, 1);
+       if (page->buffers)
+               BUG();
+       if (!head)
+               BUG();
+       tail = head;
+       for (bh = head; bh; bh = bh->b_this_page) {
                 block = *(b++);
  
-               init_buffer(next, dev, block, end_buffer_io_async, NULL);
-               set_bit(BH_Uptodate, &next->b_state);
+               tail = bh;
+               init_buffer(bh, dev, block, end_buffer_io_async, NULL);
  
                 /*
                  * When we use bmap, we define block zero to represent
@@ -1250,51 +1272,334 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
                  * two cases.
                  */
                 if (bmap && !block) {
-                       memset(next->b_data, 0, size);
-                       next->b_count--;
-                       continue;
+                       set_bit(BH_Uptodate, &bh->b_state);
+                       unlock_kernel();
+                       memset(bh->b_data, 0, size);
+                       lock_kernel();
+               } else {
+                       struct buffer_head *alias = find_buffer(dev, block, size);
+                       /*
+                        * Tricky issue. It is legal to have an alias here,
+                        * because the buffer-cache layer can increase the
+                        * b_counter even if the buffer goes inactive
+                        * meanwhile.
+                        */
+                       if (alias) {
+                               printk(" buffer %p has nonzero alias %p which is locked!!! hoping that it will go away.\n", bh, alias);
+                       }
                 }
-               tmp = get_hash_table(dev, block, size);
-               if (tmp) {
-                       if (!buffer_uptodate(tmp)) {
-                               if (rw == READ)
-                                       ll_rw_block(READ, 1, &tmp);
-                               wait_on_buffer(tmp);
+       }
+       tail->b_this_page = head;
+       page->buffers = head;
+       return 0;
+}
+
+/*
+ * Can the buffer be thrown out?
+ */
+#define BUFFER_BUSY_BITS       ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
+#define buffer_busy(bh)        ((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))
+
+static int page_idle(struct page *page, int sync)
+{
+       struct buffer_head *head, *bh, *next;
+
+       head = page->buffers;
+       bh = head;
+       do {
+               next = bh->b_this_page;
+
+               if (bh->b_blocknr) {
+                       if (buffer_locked(bh)) {
+                               wait_on_buffer(bh);
+                               return 0;
                         }
-                       if (rw == READ) 
-                               memcpy(next->b_data, tmp->b_data, size);
-                       else {
-                               memcpy(tmp->b_data, next->b_data, size);
-                               mark_buffer_dirty(tmp, 0);
+                       if (buffer_dirty(bh)) {
+                               if (sync) {
+                                       wait_on_buffer(bh);
+                                       ll_rw_block(WRITE, 1, &bh);
+                                       return 0;
+                               } else
+                                       clear_bit(BH_Dirty, &bh->b_state);
                         }
-                       brelse(tmp);
-                       next->b_count--;
-                       continue;
                 }
-               if (rw == READ)
-                       clear_bit(BH_Uptodate, &next->b_state);
-               else
-                       set_bit(BH_Dirty, &next->b_state);
-               arr[nr++] = next;
-       } while (prev = next, (next = next->b_this_page) != NULL);
-       prev->b_this_page = bh;
-       
-       if (nr) {
+               bh = next;
+       } while (bh != head);
+       return 1;
+}
+
+/*
+ * We dont have to release all buffers here, but
+ * we have to be sure that no dirty buffer is left
+ * and no IO is going on (no buffer is locked), because
+ * we are going to free the underlying page.
+ */
+int generic_block_flushpage(struct inode *inode, struct page *page, int sync)
+{
+       struct buffer_head *head, *bh, *next;
+
+
+       if (!PageLocked(page))
+               BUG();
+       if (!page->buffers)
+               BUG();
+
+       while (!page_idle(page, sync));
+
+       head = page->buffers;
+       bh = head;
+       do {
+               next = bh->b_this_page;
+               if (bh->b_blocknr) {
+                       if(bh->b_dev == B_FREE) {
+                               remove_from_free_list(bh);
+                       } else {
+                               if (bh->b_list == BUF_DIRTY) {
+                                       nr_buffers--;
+                                       nr_buffers_type[BUF_DIRTY]--;
+                                       remove_from_lru_list(bh);
+                               }
+                       }
+               } else {
+               }
+               bh->b_state = 0;
+               bh->b_count = 0;
+               put_unused_buffer_head(bh);
+               bh = next;
+       } while (bh != head);
+       page->buffers = NULL;
+
+       return 0;
+}
+
+
+long block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block)
+{
+       struct dentry *dentry = file->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       unsigned long block;
+       int err, created;
+       unsigned long blocksize, start_block, end_block;
+       unsigned long start_offset, start_bytes, end_bytes;
+       unsigned long bbits, phys, blocks, i, len;
+       struct buffer_head *bh;
+       char * target_buf;
+
+       target_buf = (char *)page_address(page) + offset;
+       lock_kernel();
+
+       if (!PageLocked(page))
+               BUG();
+
+       blocksize = inode->i_sb->s_blocksize;
+       if (!page->buffers) {
+               struct buffer_head *head, *tail;
+
+               head = create_buffers(page_address(page), blocksize, 1);
+               if (page->buffers)
+                       BUG();
+
+               bh = head;
+               do {
+                       bh->b_dev = inode->i_dev;
+                       tail = bh;
+                       bh = bh->b_this_page;
+               } while (bh);
+               tail->b_this_page = head;
+               page->buffers = head;
+       }
+
+       bbits = inode->i_sb->s_blocksize_bits;
+       block = page->offset >> bbits;
+       blocks = PAGE_SIZE >> bbits;
+       start_block = offset >> bbits;
+       end_block = (offset + bytes - 1) >> bbits;
+       start_offset = offset & (blocksize - 1);
+       start_bytes = blocksize - start_offset;
+       if (start_bytes > bytes)
+               start_bytes = bytes;
+       end_bytes = (offset+bytes) & (blocksize - 1);
+       if (end_bytes > bytes)
+               end_bytes = bytes;
+
+       if (offset < 0 || offset >= PAGE_SIZE)
+               BUG();
+       if (bytes+offset < 0 || bytes+offset > PAGE_SIZE)
+               BUG();
+       if (start_block < 0 || start_block >= blocks)
+               BUG();
+       if (end_block < 0 || end_block >= blocks)
+               BUG();
+       // FIXME: currently we assume page alignment.
+       if (page->offset & (PAGE_SIZE-1))
+               BUG();
+
+       bh = page->buffers;
+       i = 0;
+       do {
+               if (!bh)
+                       BUG();
+
+               if ((i < start_block) || (i > end_block)) {
+                       goto skip;
+               }
+               if (!bh->b_blocknr) {
+                       phys = fs_get_block (inode, block, 1, &err, &created);
+                       err = -EIO;
+                       if (!phys)
+                               goto out;
+
+                       unlock_kernel();
+                       /*
+                        * if partially written block which has contents on
+                        * disk, then we have to read it first.
+                        */
+                       if (!created && (start_offset ||
+                                       (end_bytes && (i == end_block)))) {
+                               init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL);
+                               bh->b_state = 0;
+                               ll_rw_block(READ, 1, &bh);
+                               wait_on_buffer(bh);
+                               err = -EIO;
+                               if (!buffer_uptodate(bh))
+                                       goto out_nolock;
+                       }
+                       lock_kernel();
+
+                       init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL);
+                       bh->b_state = (1<<BH_Dirty) | (1<<BH_Uptodate);
+                       bh->b_list = BUF_DIRTY;
+                       insert_into_dirty_queue(bh);
+               } else {
+                       /*
+                        * block already exists, just mark it dirty:
+                        */
+                       bh->b_end_io = end_buffer_io_sync;
+                       set_bit(BH_Dirty, &bh->b_state);
+                       set_bit(BH_Uptodate, &bh->b_state);
+               }
+               unlock_kernel();
+
+               err = -EFAULT;
+               if (start_offset) {
+                       len = start_bytes;
+                       start_offset = 0;
+               } else
+               if (end_bytes && (i == end_block)) {
+                       len = end_bytes;
+                       end_bytes = 0;
+               } else {
+                       /*
+                        * Overwritten block.
+                        */
+                       len = blocksize;
+               }
+               if (copy_from_user(target_buf, buf, len))
+                       goto out_nolock;
+               target_buf += len;
+               buf += len;
+
+               lock_kernel();
+               if (bh->b_list != BUF_DIRTY) {
+                       bh->b_list = BUF_DIRTY;
+                       insert_into_dirty_queue(bh);
+               }
+skip:
+               i++;
+               block++;
+               bh = bh->b_this_page;
+       } while (i < blocks);
+       unlock_kernel();
+
+       SetPageUptodate(page);
+       return bytes;
+out:
+       unlock_kernel();
+out_nolock:
+       ClearPageUptodate(page);
+       return err;
+}
+
+/*
+ * Start I/O on a page.
+ * This function expects the page to be locked and may return
+ * before I/O is complete. You then have to check page->locked,
+ * page->uptodate, and maybe wait on page->wait.
+ */
+int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
+{
+       struct buffer_head *head, *bh, *arr[MAX_BUF_PER_PAGE];
+       int nr, fresh, block;
+
+
+       if ((rw == READ) && !PageLocked(page))
+               panic("brw_page: page not locked for I/O");
+//     clear_bit(PG_error, &page->flags);
+       /*
+        * We pretty much rely on the page lock for this, because
+        * create_page_buffers() might sleep.
+        */
+       fresh = 0;
+       if (!page->buffers) {
+               create_page_buffers(rw, page, dev, b, size, bmap);
+               fresh = 1;
+       }
+       if (!page->buffers)
+               BUG();
+
+       head = page->buffers;
+       bh = head;
+       nr = 0;
+       do {
+               block = *(b++);
+
+               if (fresh && (bh->b_count != 1))
+                       BUG();
+               if (rw == READ) {
+                       if (!fresh)
+                               BUG();
+                       if (bmap && !block) {
+                               if (block)
+                                       BUG();
+                       } else {
+                               if (bmap && !block)
+                                       BUG();
+                               if (!buffer_uptodate(bh)) {
+                                       arr[nr++] = bh;
+                               }
+                       }
+               } else { /* WRITE */
+                       if (!bh->b_blocknr) {
+                               if (!block)
+                                       BUG();
+                               bh->b_blocknr = block;
+                       } else {
+                               if (!block)
+                                       BUG();
+                       }
+                       set_bit(BH_Dirty, &bh->b_state);
+                       set_bit(BH_Uptodate, &bh->b_state);
+                       if (bh->b_list != BUF_DIRTY) {
+                               bh->b_list = BUF_DIRTY;
+                               insert_into_dirty_queue(bh);
+                       }
+               }
+               bh = bh->b_this_page;
+       } while (bh != head);
+       if (rw == READ)
+               ++current->maj_flt;
+       if ((rw == READ) && nr) {
+               if (Page_Uptodate(page))
+                       BUG();
+               unlock_kernel();
                 ll_rw_block(rw, nr, arr);
-               /* The rest of the work is done in mark_buffer_uptodate()
-                * and unlock_buffer(). */
+               lock_kernel();
         } else {
-               unsigned long flags;
-               clear_bit(PG_locked, &page->flags);
-               set_bit(PG_uptodate, &page->flags);
-               wake_up(&page->wait);
-               save_flags(flags);
-               cli();
-               free_async_buffers(bh);
-               restore_flags(flags);
-               after_unlock_page(page);
+               if (!nr && rw == READ) {
+                       SetPageUptodate(page);
+                       UnlockPage(page);
+               }
         }
-       ++current->maj_flt;
         return 0;
  }
  
@@ -1305,6 +1610,7 @@ void mark_buffer_uptodate(struct buffer_head * bh, int on)
  {
         if (on) {
                 struct buffer_head *tmp = bh;
+               struct page *page;
                 set_bit(BH_Uptodate, &bh->b_state);
                 /* If a page has buffers and all these buffers are uptodate,
                  * then the page is uptodate. */
@@ -1313,7 +1619,8 @@ void mark_buffer_uptodate(struct buffer_head * bh, int on)
                                 return;
                         tmp=tmp->b_this_page;
                 } while (tmp && tmp != bh);
-               set_bit(PG_uptodate, &mem_map[MAP_NR(bh->b_data)].flags);
+               page = mem_map + MAP_NR(bh->b_data);
+               SetPageUptodate(page);
                 return;
         }
         clear_bit(BH_Uptodate, &bh->b_state);
@@ -1334,9 +1641,11 @@ int generic_readpage(struct file * file, struct page * page)
         int *p, nr[PAGE_SIZE/512];
         int i;
  
-       atomic_inc(&page->count);
-       set_bit(PG_locked, &page->flags);
-       set_bit(PG_free_after, &page->flags);
+       if (page->buffers) {
+               printk("hm, no brw_page(%p) because IO already started.\n",
+                                        page);
+               goto out;
+       }
         
         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
         block = page->offset >> inode->i_sb->s_blocksize_bits;
@@ -1350,6 +1659,7 @@ int generic_readpage(struct file * file, struct page * page)
  
         /* IO start */
         brw_page(READ, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
+out:
         return 0;
  }
  
@@ -1392,7 +1702,7 @@ static int grow_buffers(int size)
                         tmp->b_next_free = tmp;
                 }
                 insert_point = tmp;
-               ++nr_buffers;
+               nr_buffers++;
                 if (tmp->b_this_page)
                         tmp = tmp->b_this_page;
                 else
@@ -1405,12 +1715,6 @@ static int grow_buffers(int size)
         return 1;
  }
  
-/*
- * Can the buffer be thrown out?
- */
-#define BUFFER_BUSY_BITS       ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
-#define buffer_busy(bh)                ((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))
-
  /*
   * try_to_free_buffers() checks if all the buffers on this particular page
   * are unused, and free's the page if so.
@@ -1418,9 +1722,9 @@ static int grow_buffers(int size)
   * Wake up bdflush() if this fails - if we're running low on memory due
   * to dirty buffers, we need to flush them out as quickly as possible.
   */
-int try_to_free_buffers(struct page * page_map)
+int try_to_free_buffers(struct page * page)
  {
-       struct buffer_head * tmp, * bh = page_map->buffers;
+       struct buffer_head * tmp, * bh = page->buffers;
  
         tmp = bh;
         do {
@@ -1448,8 +1752,8 @@ int try_to_free_buffers(struct page * page_map)
  
         /* And free the page */
         buffermem -= PAGE_SIZE;
-       page_map->buffers = NULL;
-       __free_page(page_map);
+       page->buffers = NULL;
+       __free_page(page);
         return 1;
  }
  
@@ -1509,11 +1813,11 @@ void __init buffer_init(unsigned long memory_size)
            the heuristic from working with large databases and getting
            fsync times (ext2) manageable, is the following */
  
-       memory_size >>= 20;
+       memory_size >>= 22;
         for (order = 5; (1UL << order) < memory_size; order++);
  
         /* try to allocate something until we get it or we're asking
-           for something that is really too small */
+          for something that is really too small */
  
         do {
                 nr_hash = (1UL << order) * PAGE_SIZE /
@@ -1521,6 +1825,7 @@ void __init buffer_init(unsigned long memory_size)
                 hash_table = (struct buffer_head **)
                     __get_free_pages(GFP_ATOMIC, order);
         } while (hash_table == NULL && --order > 4);
+       printk("buffer-cache hash table entries: %d (order: %d, %ld bytes)\n", nr_hash, order, (1UL<<order) * PAGE_SIZE);
         
         if (!hash_table)
                 panic("Failed to allocate buffer hash table\n");
@@ -1565,11 +1870,11 @@ void wakeup_bdflush(int wait)
  {
         if (current == bdflush_tsk)
                 return;
-       wake_up(&bdflush_wait);
-       if (wait) {
+       if (wait)
                 run_task_queue(&tq_disk);
+       wake_up(&bdflush_wait);
+       if (wait)
                 sleep_on(&bdflush_done);
-       }
  }
  
  
@@ -1801,6 +2106,7 @@ int bdflush(void * unused)
  #endif
                                           bh->b_count--;
                                           next->b_count--;
+                                         wake_up(&buffer_wait);
                                   }
                  }
  #ifdef DEBUG
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c

index 2c7ba02d7f4de4a78ec24195af9973ad5d3ce1e3..053022309cabc339e4199f04cdb138a3ed5b25cb 100644 (file)
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -358,7 +358,7 @@ error_return:
   * bitmap, and then for any free bit if that fails.
   */
  int ext2_new_block (const struct inode * inode, unsigned long goal,
-                   u32 * prealloc_count, u32 * prealloc_block, int * err)
+    u32 * prealloc_count, u32 * prealloc_block, int * err)
  {
         struct buffer_head * bh;
         struct buffer_head * bh2;
@@ -594,20 +594,12 @@ got_block:
  
         if (j >= le32_to_cpu(es->s_blocks_count)) {
                 ext2_error (sb, "ext2_new_block",
-                           "block >= blocks count - "
-                           "block_group = %d, block=%d", i, j);
+                           "block(%d) >= blocks count(%d) - "
+                           "block_group = %d, es == %p ",j,
+                       le32_to_cpu(es->s_blocks_count), i, es);
                 unlock_super (sb);
                 return 0;
         }
-       if (!(bh = getblk (sb->s_dev, j, sb->s_blocksize))) {
-               ext2_error (sb, "ext2_new_block", "cannot get block %d", j);
-               unlock_super (sb);
-               return 0;
-       }
-       memset(bh->b_data, 0, sb->s_blocksize);
-       mark_buffer_uptodate(bh, 1);
-       mark_buffer_dirty(bh, 1);
-       brelse (bh);
  
         ext2_debug ("allocating block %d. "
                     "Goal hits %d of %d.\n", j, goal_hits, goal_attempts);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c

index 2e4d2812cf0458957aaa1ea3d2218e1b5c85caf7..dd4c5b38a352c2f00a59880962950801e3d36457 100644 (file)
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -30,15 +30,15 @@
  #include <linux/locks.h>
  #include <linux/mm.h>
  #include <linux/pagemap.h>
+#include <linux/smp_lock.h>
  
  #define        NBUF    32
  
  #define MIN(a,b) (((a)<(b))?(a):(b))
  #define MAX(a,b) (((a)>(b))?(a):(b))
  
+static int ext2_writepage (struct file * file, struct page * page);
  static long long ext2_file_lseek(struct file *, long long, int);
-static ssize_t ext2_file_write (struct file *, const char *, size_t, loff_t *);
-static int ext2_release_file (struct inode *, struct file *);
  #if BITS_PER_LONG < 64
  static int ext2_open_file (struct inode *, struct file *);
  
@@ -57,51 +57,6 @@ EXT2_MAX_SIZE(10), EXT2_MAX_SIZE(11), EXT2_MAX_SIZE(12), EXT2_MAX_SIZE(13)
  
  #endif
  
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the ext2 filesystem.
- */
-static struct file_operations ext2_file_operations = {
-       ext2_file_lseek,        /* lseek */
-       generic_file_read,      /* read */
-       ext2_file_write,        /* write */
-       NULL,                   /* readdir - bad */
-       NULL,                   /* poll - default */
-       ext2_ioctl,             /* ioctl */
-       generic_file_mmap,      /* mmap */
-#if BITS_PER_LONG == 64        
-       NULL,                   /* no special open is needed */
-#else
-       ext2_open_file,
-#endif
-       NULL,                   /* flush */
-       ext2_release_file,      /* release */
-       ext2_sync_file,         /* fsync */
-       NULL,                   /* fasync */
-       NULL,                   /* check_media_change */
-       NULL                    /* revalidate */
-};
-
-struct inode_operations ext2_file_inode_operations = {
-       &ext2_file_operations,/* default file operations */
-       NULL,                   /* create */
-       NULL,                   /* lookup */
-       NULL,                   /* link */
-       NULL,                   /* unlink */
-       NULL,                   /* symlink */
-       NULL,                   /* mkdir */
-       NULL,                   /* rmdir */
-       NULL,                   /* mknod */
-       NULL,                   /* rename */
-       NULL,                   /* readlink */
-       NULL,                   /* follow_link */
-       generic_readpage,       /* readpage */
-       NULL,                   /* writepage */
-       ext2_bmap,              /* bmap */
-       ext2_truncate,          /* truncate */
-       ext2_permission,        /* permission */
-       NULL                    /* smap */
-};
  
  /*
   * Make sure the offset never goes beyond the 32-bit mark..
@@ -151,164 +106,50 @@ static inline void remove_suid(struct inode *inode)
         }
  }
  
-static ssize_t ext2_file_write (struct file * filp, const char * buf,
-                               size_t count, loff_t *ppos)
+static int ext2_writepage (struct file * file, struct page * page)
  {
-       struct inode * inode = filp->f_dentry->d_inode;
-       off_t pos;
-       long block;
-       int offset;
-       int written, c;
-       struct buffer_head * bh, *bufferlist[NBUF];
-       struct super_block * sb;
-       int err;
-       int i,buffercount,write_error;
-
-       /* POSIX: mtime/ctime may not change for 0 count */
-       if (!count)
-               return 0;
-       write_error = buffercount = 0;
-       if (!inode) {
-               printk("ext2_file_write: inode = NULL\n");
-               return -EINVAL;
-       }
-       sb = inode->i_sb;
-       if (sb->s_flags & MS_RDONLY)
-               /*
-                * This fs has been automatically remounted ro because of errors
-                */
-               return -ENOSPC;
-
-       if (!S_ISREG(inode->i_mode)) {
-               ext2_warning (sb, "ext2_file_write", "mode = %07o",
-                             inode->i_mode);
-               return -EINVAL;
-       }
-       remove_suid(inode);
-
-       if (filp->f_flags & O_APPEND)
-               pos = inode->i_size;
-       else {
-               pos = *ppos;
-               if (pos != *ppos)
-                       return -EINVAL;
-#if BITS_PER_LONG >= 64
-               if (pos > ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(sb)])
-                       return -EINVAL;
-#endif
-       }
-
-       /* Check for overflow.. */
-#if BITS_PER_LONG < 64
-       if (pos > (__u32) (pos + count)) {
-               count = ~pos; /* == 0xFFFFFFFF - pos */
-               if (!count)
-                       return -EFBIG;
-       }
-#else
-       {
-               off_t max = ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(sb)];
-
-               if (pos + count > max) {
-                       count = max - pos;
-                       if (!count)
-                               return -EFBIG;
-               }
-               if (((pos + count) >> 32) && 
-                   !(sb->u.ext2_sb.s_es->s_feature_ro_compat &
-                     cpu_to_le32(EXT2_FEATURE_RO_COMPAT_LARGE_FILE))) {
-                       /* If this is the first large file created, add a flag
-                          to the superblock */
-                       sb->u.ext2_sb.s_es->s_feature_ro_compat |=
-                               cpu_to_le32(EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
-                       mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
-               }
-       }
-#endif
-
-       /*
-        * If a file has been opened in synchronous mode, we have to ensure
-        * that meta-data will also be written synchronously.  Thus, we
-        * set the i_osync field.  This field is tested by the allocation
-        * routines.
-        */
-       if (filp->f_flags & O_SYNC)
-               inode->u.ext2_i.i_osync++;
-       block = pos >> EXT2_BLOCK_SIZE_BITS(sb);
-       offset = pos & (sb->s_blocksize - 1);
-       c = sb->s_blocksize - offset;
-       written = 0;
+       struct dentry *dentry = file->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       unsigned long block;
+       int *p, nr[PAGE_SIZE/512];
+       int i, err, created;
+       struct buffer_head *bh;
+
+       i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
+       block = page->offset >> inode->i_sb->s_blocksize_bits;
+       p = nr;
+       bh = page->buffers;
         do {
-               bh = ext2_getblk (inode, block, 1, &err);
-               if (!bh) {
-                       if (!written)
-                               written = err;
-                       break;
-               }
-               if (c > count)
-                       c = count;
-               if (c != sb->s_blocksize && !buffer_uptodate(bh)) {
-                       ll_rw_block (READ, 1, &bh);
-                       wait_on_buffer (bh);
-                       if (!buffer_uptodate(bh)) {
-                               brelse (bh);
-                               if (!written)
-                                       written = -EIO;
-                               break;
-                       }
-               }
-               c -= copy_from_user (bh->b_data + offset, buf, c);
-               if (!c) {
-                       brelse(bh);
-                       if (!written)
-                               written = -EFAULT;
-                       break;
-               }
-               update_vm_cache(inode, pos, bh->b_data + offset, c);
-               pos += c;
-               written += c;
-               buf += c;
-               count -= c;
-               mark_buffer_uptodate(bh, 1);
-               mark_buffer_dirty(bh, 0);
-
-               if (filp->f_flags & O_SYNC)
-                       bufferlist[buffercount++] = bh;
+               if (bh && bh->b_blocknr)
+                       *p = bh->b_blocknr;
                 else
-                       brelse(bh);
-               if (buffercount == NBUF){
-                       ll_rw_block(WRITE, buffercount, bufferlist);
-                       for(i=0; i<buffercount; i++){
-                               wait_on_buffer(bufferlist[i]);
-                               if (!buffer_uptodate(bufferlist[i]))
-                                       write_error=1;
-                               brelse(bufferlist[i]);
-                       }
-                       buffercount=0;
-               }
-               if(write_error)
-                       break;
+                       *p = ext2_getblk_block (inode, block, 1, &err, &created);
+               if (!*p)
+                       return -EIO;
+               i--;
                 block++;
-               offset = 0;
-               c = sb->s_blocksize;
-       } while (count);
-       if ( buffercount ){
-               ll_rw_block(WRITE, buffercount, bufferlist);
-               for(i=0; i<buffercount; i++){
-                       wait_on_buffer(bufferlist[i]);
-                       if (!buffer_uptodate(bufferlist[i]))
-                               write_error=1;
-                       brelse(bufferlist[i]);
-               }
-       }               
-       if (pos > inode->i_size)
-               inode->i_size = pos;
-       if (filp->f_flags & O_SYNC)
-               inode->u.ext2_i.i_osync--;
-       inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-       *ppos = pos;
-       mark_inode_dirty(inode);
-       return written;
+               p++;
+               if (bh)
+                       bh = bh->b_this_page;
+       } while (i > 0);
+
+       /* IO start */
+       brw_page(WRITE, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
+       return 0;
+}
+
+static long ext2_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+{
+       return block_write_one_page(file, page, offset, bytes, buf, ext2_getblk_block);
+}
+
+/*
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+ext2_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+{
+       return generic_file_write(file, buf, count, ppos, ext2_write_one_page);
  }
  
  /*
@@ -335,3 +176,52 @@ static int ext2_open_file (struct inode * inode, struct file * filp)
         return 0;
  }
  #endif
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the ext2 filesystem.
+ */
+static struct file_operations ext2_file_operations = {
+       ext2_file_lseek,        /* lseek */
+       generic_file_read,      /* read */
+       ext2_file_write,        /* write */
+       NULL,                   /* readdir - bad */
+       NULL,                   /* poll - default */
+       ext2_ioctl,             /* ioctl */
+       generic_file_mmap,      /* mmap */
+#if BITS_PER_LONG == 64        
+       NULL,                   /* no special open is needed */
+#else
+       ext2_open_file,
+#endif
+       NULL,                   /* flush */
+       ext2_release_file,      /* release */
+       ext2_sync_file,         /* fsync */
+       NULL,                   /* fasync */
+       NULL,                   /* check_media_change */
+       NULL                    /* revalidate */
+};
+
+struct inode_operations ext2_file_inode_operations = {
+       &ext2_file_operations,/* default file operations */
+       NULL,                   /* create */
+       NULL,                   /* lookup */
+       NULL,                   /* link */
+       NULL,                   /* unlink */
+       NULL,                   /* symlink */
+       NULL,                   /* mkdir */
+       NULL,                   /* rmdir */
+       NULL,                   /* mknod */
+       NULL,                   /* rename */
+       NULL,                   /* readlink */
+       NULL,                   /* follow_link */
+       generic_readpage,       /* readpage */
+       ext2_writepage,         /* writepage */
+       ext2_bmap,              /* bmap */
+       ext2_truncate,          /* truncate */
+       ext2_permission,        /* permission */
+       NULL,                   /* smap */
+       NULL,                   /* updatepage */
+       NULL,                   /* revalidate */
+       generic_block_flushpage,/* flushpage */
+};
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index 693964a8030e6f86006384e3b837149dd2210071..9e985d6c80ae2d78b854caeefd849fd8f341edb9 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -92,13 +92,12 @@ void ext2_discard_prealloc (struct inode * inode)
  #endif
  }
  
-static int ext2_alloc_block (struct inode * inode, unsigned long goal, int * err)
+static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err)
  {
  #ifdef EXT2FS_DEBUG
         static unsigned long alloc_hits = 0, alloc_attempts = 0;
  #endif
         unsigned long result;
-       struct buffer_head * bh;
  
         wait_on_super (inode->i_sb);
  
@@ -112,19 +111,6 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int * err
                 ext2_debug ("preallocation hit (%lu/%lu).\n",
                             ++alloc_hits, ++alloc_attempts);
  
-               /* It doesn't matter if we block in getblk() since
-                  we have already atomically allocated the block, and
-                  are only clearing it now. */
-               if (!(bh = getblk (inode->i_sb->s_dev, result,
-                                  inode->i_sb->s_blocksize))) {
-                       ext2_error (inode->i_sb, "ext2_alloc_block",
-                                   "cannot get block %lu", result);
-                       return 0;
-               }
-               memset(bh->b_data, 0, inode->i_sb->s_blocksize);
-               mark_buffer_uptodate(bh, 1);
-               mark_buffer_dirty(bh, 1);
-               brelse (bh);
         } else {
                 ext2_discard_prealloc (inode);
                 ext2_debug ("preallocation miss (%lu/%lu).\n",
@@ -139,7 +125,6 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int * err
  #else
         result = ext2_new_block (inode, goal, 0, 0, err);
  #endif
-
         return result;
  }
  
@@ -200,8 +185,65 @@ int ext2_bmap (struct inode * inode, int block)
                            block & (addr_per_block - 1));
  }
  
+int ext2_bmap_create (struct inode * inode, int block)
+{
+       int i;
+       int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+       int addr_per_block_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
+
+       if (block < 0) {
+               ext2_warning (inode->i_sb, "ext2_bmap", "block < 0");
+               return 0;
+       }
+       if (block >= EXT2_NDIR_BLOCKS + addr_per_block +
+               (1 << (addr_per_block_bits * 2)) +
+               ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {
+               ext2_warning (inode->i_sb, "ext2_bmap", "block > big");
+               return 0;
+       }
+       if (block < EXT2_NDIR_BLOCKS)
+               return inode_bmap (inode, block);
+       block -= EXT2_NDIR_BLOCKS;
+       if (block < addr_per_block) {
+               i = inode_bmap (inode, EXT2_IND_BLOCK);
+               if (!i)
+                       return 0;
+               return block_bmap (bread (inode->i_dev, i,
+                                         inode->i_sb->s_blocksize), block);
+       }
+       block -= addr_per_block;
+       if (block < (1 << (addr_per_block_bits * 2))) {
+               i = inode_bmap (inode, EXT2_DIND_BLOCK);
+               if (!i)
+                       return 0;
+               i = block_bmap (bread (inode->i_dev, i,
+                                      inode->i_sb->s_blocksize),
+                               block >> addr_per_block_bits);
+               if (!i)
+                       return 0;
+               return block_bmap (bread (inode->i_dev, i,
+                                         inode->i_sb->s_blocksize),
+                                  block & (addr_per_block - 1));
+       }
+       block -= (1 << (addr_per_block_bits * 2));
+       i = inode_bmap (inode, EXT2_TIND_BLOCK);
+       if (!i)
+               return 0;
+       i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
+                       block >> (addr_per_block_bits * 2));
+       if (!i)
+               return 0;
+       i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
+                       (block >> addr_per_block_bits) & (addr_per_block - 1));
+       if (!i)
+               return 0;
+       return block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
+                          block & (addr_per_block - 1));
+}
+
  static struct buffer_head * inode_getblk (struct inode * inode, int nr,
-                                         int create, int new_block, int * err)
+       int create, int new_block, int * err, int metadata,
+       int *phys_block, int *created)
  {
         u32 * p;
         int tmp, goal = 0;
@@ -212,11 +254,16 @@ static struct buffer_head * inode_getblk (struct inode * inode, int nr,
  repeat:
         tmp = *p;
         if (tmp) {
-               struct buffer_head * result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
-               if (tmp == *p)
-                       return result;
-               brelse (result);
-               goto repeat;
+               if (metadata) {
+                       struct buffer_head * result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
+                       if (tmp == *p)
+                               return result;
+                       brelse (result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
+                       return NULL;
+               }
         }
         *err = -EFBIG;
         if (!create)
@@ -259,13 +306,28 @@ dont_create:
         tmp = ext2_alloc_block (inode, goal, err);
         if (!tmp)
                 return NULL;
-       result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
-       if (*p) {
-               ext2_free_blocks (inode, tmp, 1);
-               brelse (result);
-               goto repeat;
+       if (metadata) {
+               result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
+               if (*p) {
+                       ext2_free_blocks (inode, tmp, 1);
+                       brelse (result);
+                       goto repeat;
+               }
+               memset(result->b_data, 0, inode->i_sb->s_blocksize);
+               mark_buffer_uptodate(result, 1);
+               mark_buffer_dirty(result, 1);
+       } else {
+               if (*p) {
+                       ext2_free_blocks (inode, tmp, 1);
+                       goto repeat;
+               }
+               *phys_block = tmp;
+               result = NULL;
+               *err = 0;
+               *created = 1;
         }
         *p = tmp;
+
         inode->u.ext2_i.i_next_alloc_block = new_block;
         inode->u.ext2_i.i_next_alloc_goal = tmp;
         inode->i_ctime = CURRENT_TIME;
@@ -277,10 +339,17 @@ dont_create:
         return result;
  }
  
+/*
+ *   metadata / data
+ *   possibly create / access
+ *   can fail due to: - not present
+ *                    - out of space
+ *
+ *   NULL return in the data case is mandatory.
+ */
  static struct buffer_head * block_getblk (struct inode * inode,
-                                         struct buffer_head * bh, int nr,
-                                         int create, int blocksize, 
-                                         int new_block, int * err)
+         struct buffer_head * bh, int nr, int create, int blocksize, 
+         int new_block, int * err, int metadata, int *phys_block, int *created)
  {
         int tmp, goal = 0;
         u32 * p;
@@ -302,13 +371,19 @@ static struct buffer_head * block_getblk (struct inode * inode,
  repeat:
         tmp = le32_to_cpu(*p);
         if (tmp) {
-               result = getblk (bh->b_dev, tmp, blocksize);
-               if (tmp == le32_to_cpu(*p)) {
+               if (metadata) {
+                       result = getblk (bh->b_dev, tmp, blocksize);
+                       if (tmp == le32_to_cpu(*p)) {
+                               brelse (bh);
+                               return result;
+                       }
+                       brelse (result);
+                       goto repeat;
+               } else {
+                       *phys_block = tmp;
                         brelse (bh);
-                       return result;
+                       return NULL;
                 }
-               brelse (result);
-               goto repeat;
         }
         *err = -EFBIG;
         if (!create) {
@@ -343,7 +418,22 @@ repeat:
                 brelse (bh);
                 return NULL;
         }
-       result = getblk (bh->b_dev, tmp, blocksize);
+       if (metadata) {
+               result = getblk (bh->b_dev, tmp, blocksize);
+               if (*p) {
+                       ext2_free_blocks (inode, tmp, 1);
+                       brelse (result);
+                       goto repeat;
+               }
+               memset(result->b_data, 0, inode->i_sb->s_blocksize);
+               mark_buffer_uptodate(result, 1);
+               mark_buffer_dirty(result, 1);
+       } else {
+               *phys_block = tmp;
+               result = NULL;
+               *err = 0;
+               *created = 1;
+       }
         if (le32_to_cpu(*p)) {
                 ext2_free_blocks (inode, tmp, 1);
                 brelse (result);
@@ -364,24 +454,25 @@ repeat:
         return result;
  }
  
-struct buffer_head * ext2_getblk (struct inode * inode, long block,
-                                 int create, int * err)
+int ext2_getblk_block (struct inode * inode, long block,
+                                 int create, int * err, int * created)
  {
-       struct buffer_head * bh;
+       struct buffer_head * bh, *tmp;
         unsigned long b;
         unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
         int addr_per_block_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
+       int phys_block;
  
         *err = -EIO;
         if (block < 0) {
                 ext2_warning (inode->i_sb, "ext2_getblk", "block < 0");
-               return NULL;
+               return 0;
         }
         if (block > EXT2_NDIR_BLOCKS + addr_per_block +
                 (1 << (addr_per_block_bits * 2)) +
                 ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {
                 ext2_warning (inode->i_sb, "ext2_getblk", "block > big");
-               return NULL;
+               return 0;
         }
         /*
          * If this is a sequential block allocation, set the next_alloc_block
@@ -398,32 +489,71 @@ struct buffer_head * ext2_getblk (struct inode * inode, long block,
                 inode->u.ext2_i.i_next_alloc_goal++;
         }
  
-       *err = -ENOSPC;
+       *err = 0; // -ENOSPC;
         b = block;
-       if (block < EXT2_NDIR_BLOCKS)
-               return inode_getblk (inode, block, create, b, err);
+       *created = 0;
+       if (block < EXT2_NDIR_BLOCKS) {
+               /*
+                * data page.
+                */
+               tmp = inode_getblk (inode, block, create, b,
+                                       err, 0, &phys_block, created);
+               goto out;
+       }
         block -= EXT2_NDIR_BLOCKS;
         if (block < addr_per_block) {
-               bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, err);
-               return block_getblk (inode, bh, block, create,
-                                    inode->i_sb->s_blocksize, b, err);
+               bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, err, 1, NULL, NULL);
+               tmp = block_getblk (inode, bh, block, create,
+                    inode->i_sb->s_blocksize, b, err, 0, &phys_block, created);
+               goto out;
         }
         block -= addr_per_block;
         if (block < (1 << (addr_per_block_bits * 2))) {
-               bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, err);
+               bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, err, 1, NULL, NULL);
                 bh = block_getblk (inode, bh, block >> addr_per_block_bits,
-                                  create, inode->i_sb->s_blocksize, b, err);
-               return block_getblk (inode, bh, block & (addr_per_block - 1),
-                                    create, inode->i_sb->s_blocksize, b, err);
+                                  create, inode->i_sb->s_blocksize, b, err, 1, NULL, NULL);
+               tmp = block_getblk (inode, bh, block & (addr_per_block - 1),
+                    create, inode->i_sb->s_blocksize, b, err, 0, &phys_block, created);
+               goto out;
         }
         block -= (1 << (addr_per_block_bits * 2));
-       bh = inode_getblk (inode, EXT2_TIND_BLOCK, create, b, err);
+       bh = inode_getblk (inode, EXT2_TIND_BLOCK, create, b, err, 1, NULL,NULL);
         bh = block_getblk (inode, bh, block >> (addr_per_block_bits * 2),
-                          create, inode->i_sb->s_blocksize, b, err);
-       bh = block_getblk (inode, bh, (block >> addr_per_block_bits) & (addr_per_block - 1),
-                          create, inode->i_sb->s_blocksize, b, err);
-       return block_getblk (inode, bh, block & (addr_per_block - 1), create,
-                            inode->i_sb->s_blocksize, b, err);
+                          create, inode->i_sb->s_blocksize, b, err, 1, NULL,NULL);
+       bh = block_getblk (inode, bh, (block >> addr_per_block_bits) &
+               (addr_per_block - 1), create, inode->i_sb->s_blocksize,
+               b, err, 1, NULL,NULL);
+       tmp = block_getblk (inode, bh, block & (addr_per_block - 1), create,
+               inode->i_sb->s_blocksize, b, err, 0, &phys_block, created);
+
+out:
+       if (!phys_block) {
+               return 0;
+       }
+       if (*err) {
+               return 0;
+       }
+       return phys_block;
+}
+
+struct buffer_head * ext2_getblk (struct inode * inode, long block,
+                                 int create, int * err)
+{
+       struct buffer_head *tmp = NULL;
+       int phys_block;
+       int created;
+
+       phys_block = ext2_getblk_block (inode, block, create, err, &created);
+
+       if (phys_block) {
+               tmp = getblk (inode->i_dev, phys_block, inode->i_sb->s_blocksize);
+               if (created) {
+                       memset(tmp->b_data, 0, inode->i_sb->s_blocksize);
+                       mark_buffer_uptodate(tmp, 1);
+                       mark_buffer_dirty(tmp, 1);
+               }
+       }
+       return tmp;
  }
  
  struct buffer_head * ext2_bread (struct inode * inode, int block, 
diff --git a/fs/ext2/truncate.c b/fs/ext2/truncate.c

index 84eacf87d128ac2a70ce5a1248ae546abcc214ee..cbf52fc427118ab89db93752c3029d42e4bd2ce6 100644 (file)
--- a/fs/ext2/truncate.c
+++ b/fs/ext2/truncate.c
@@ -160,6 +160,9 @@ out:
         return retry;
  }
  
+#define DATA_BUFFER_USED(bh) \
+       ((bh->b_count > 1) || buffer_locked(bh))
+
  static int trunc_direct (struct inode * inode)
  {
         struct buffer_head * bh;
@@ -178,7 +181,7 @@ static int trunc_direct (struct inode * inode)
                 bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize);
                 if (bh) {
                         bh->b_count++;
-                       if(bh->b_count != 1 || buffer_locked(bh)) {
+                       if (DATA_BUFFER_USED(bh)) {
                                 brelse(bh);
                                 retry = 1;
                                 continue;
@@ -255,8 +258,8 @@ static int trunc_indirect (struct inode * inode, int offset, u32 * p,
                 bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize);
                 if (bh) {
                         bh->b_count++;
-                       if (bh->b_count != 1 || buffer_locked(bh)) {
-                               brelse (bh);
+                       if (DATA_BUFFER_USED(bh)) {
+                               brelse(bh);
                                 retry = 1;
                                 continue;
                         }
@@ -384,8 +387,6 @@ static int trunc_tindirect (struct inode * inode)
                 
  void ext2_truncate (struct inode * inode)
  {
-       int err, offset;
-
         if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
             S_ISLNK(inode->i_mode)))
                 return;
@@ -411,25 +412,6 @@ void ext2_truncate (struct inode * inode)
                 current->policy |= SCHED_YIELD;
                 schedule();
         }
-       /*
-        * If the file is not being truncated to a block boundary, the
-        * contents of the partial block following the end of the file
-        * must be zeroed in case it ever becomes accessible again due
-        * to subsequent file growth.
-        */
-       offset = inode->i_size & (inode->i_sb->s_blocksize - 1);
-       if (offset) {
-               struct buffer_head * bh;
-               bh = ext2_bread (inode,
-                                inode->i_size >> EXT2_BLOCK_SIZE_BITS(inode->i_sb),
-                                0, &err);
-               if (bh) {
-                       memset (bh->b_data + offset, 0,
-                               inode->i_sb->s_blocksize - offset);
-                       mark_buffer_dirty (bh, 0);
-                       brelse (bh);
-               }
-       }
         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
         mark_inode_dirty(inode);
  }
diff --git a/fs/fifo.c b/fs/fifo.c

index 5117d3a6e06908cfad059b0f753abec52a0e3ca1..ecb27722efbcfd3707f500bf99090df8711f7086 100644 (file)
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -2,14 +2,45 @@
   *  linux/fs/fifo.c
   *
   *  written by Paul H. Hargrove
+ *
+ *  Fixes:
+ *     10-06-1999, AV: fixed OOM handling in fifo_open(), moved
+ *                     initialization there, switched to external
+ *                     allocation of pipe_inode_info.
   */
  
  #include <linux/mm.h>
+#include <linux/malloc.h>
  
  static int fifo_open(struct inode * inode,struct file * filp)
  {
         int retval = 0;
-       unsigned long page;
+       unsigned long page = 0;
+       struct pipe_inode_info *info, *tmp = NULL;
+
+       if (inode->i_pipe)
+               goto got_it;
+       tmp = kmalloc(sizeof(struct pipe_inode_info),GFP_KERNEL);
+       if (inode->i_pipe)
+               goto got_it;
+       if (!tmp)
+               goto oom;
+       page = __get_free_page(GFP_KERNEL);
+       if (inode->i_pipe)
+               goto got_it;
+       if (!page)
+               goto oom;
+       inode->i_pipe = tmp;
+       PIPE_LOCK(*inode) = 0;
+       PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+       PIPE_BASE(*inode) = (char *) page;
+       PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
+       PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
+       init_waitqueue_head(&PIPE_WAIT(*inode));
+       tmp = NULL;     /* no need to free it */
+       page = 0;
+
+got_it:
  
         switch( filp->f_mode ) {
  
@@ -94,19 +125,26 @@ static int fifo_open(struct inode * inode,struct file * filp)
         default:
                 retval = -EINVAL;
         }
-       if (retval || PIPE_BASE(*inode))
-               return retval;
-       page = __get_free_page(GFP_KERNEL);
-       if (PIPE_BASE(*inode)) {
+       if (retval) 
+               goto cleanup;
+out:
+       if (tmp)
+               kfree(tmp);
+       if (page)
                 free_page(page);
-               return 0;
+       return retval;
+
+cleanup:
+       if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
+               info = inode->i_pipe;
+               inode->i_pipe = NULL;
+               free_page((unsigned long)info->base);
+               kfree(info);
         }
-       if (!page)
-               return -ENOMEM;
-       PIPE_LOCK(*inode) = 0;
-       PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
-       PIPE_BASE(*inode) = (char *) page;
-       return 0;
+       goto out;
+oom:
+       retval = -ENOMEM;
+       goto out;
  }
  
  /*
@@ -148,13 +186,10 @@ struct inode_operations fifo_inode_operations = {
         NULL                    /* permission */
  };
  
+
+/* Goner. Filesystems do not use it anymore. */
+
  void init_fifo(struct inode * inode)
  {
         inode->i_op = &fifo_inode_operations;
-       PIPE_LOCK(*inode) = 0;
-       PIPE_BASE(*inode) = NULL;
-       PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
-       PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
-       init_waitqueue_head(&PIPE_WAIT(*inode));
-       PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
  }
diff --git a/fs/inode.c b/fs/inode.c

index 88805efe61047f338cfc9bd58f824651c3f464e3..ee8602939f1affa3d7e6500ead9071ac8d980f85 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -527,6 +527,7 @@ void clean_inode(struct inode *inode)
         inode->i_generation = 0;
         memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
         sema_init(&inode->i_sem, 1);
+       inode->i_pipe = NULL;
  }
  
  /*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index ed8b1fe0e756a61958ff66eb3afd388cac1387c0..f137542feb54d0e2143ca24b9807dea86d8a5c0a 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -221,35 +221,36 @@ static struct page *try_to_get_dirent_page(struct file *, unsigned long, int);
   */
  static int refetch_to_readdir_off(struct file *file, struct inode *inode, u32 off)
  {
+       struct page *page;
         u32 cur_off, goal_off = off & PAGE_MASK;
  
  again:
         cur_off = 0;
         while (cur_off < goal_off) {
-               struct page *page;
-
-               page = find_page(inode, cur_off);
+               page = find_get_page(inode, cur_off);
                 if (page) {
-                       if (PageLocked(page))
-                               __wait_on_page(page);
-                       if (!PageUptodate(page))
-                               return -1;
+                       if (!Page_Uptodate(page))
+                               goto out_error;
                 } else {
                         page = try_to_get_dirent_page(file, cur_off, 0);
                         if (!page) {
                                 if (!cur_off)
-                                       return -1;
+                                       goto out_error;
  
                                 /* Someone touched the dir on us. */
                                 goto again;
                         }
-                       page_cache_release(page);
                 }
+               page_cache_release(page);
  
                 cur_off += PAGE_SIZE;
         }
-
         return 0;
+
+out_error:
+       if (page)
+               page_cache_release(page);
+       return -1;
  }
  
  static struct page *try_to_get_dirent_page(struct file *file, unsigned long offset, int refetch_ok)
@@ -274,20 +275,18 @@ static struct page *try_to_get_dirent_page(struct file *file, unsigned long offs
         }
  
         hash = page_hash(inode, offset);
-       page = __find_page(inode, offset, *hash);
+repeat:
+       page = __find_lock_page(inode, offset, *hash);
         if (page) {
                 page_cache_free(page_cache);
-               goto out;
+               goto unlock_out;
         }
  
         page = page_cache_entry(page_cache);
-       atomic_inc(&page->count);
-       page->flags = ((page->flags &
-                       ~((1 << PG_uptodate) | (1 << PG_error))) |
-                      ((1 << PG_referenced) | (1 << PG_locked)));
-       page->offset = offset;
-       add_page_to_inode_queue(inode, page);
-       __add_page_to_hash_queue(page, hash);
+       if (add_to_page_cache_unique(page, inode, offset, hash)) {
+               page_cache_release(page);
+               goto repeat;
+       }
  
         rd_args.fh = NFS_FH(dentry);
         rd_res.buffer = (char *)page_cache;
@@ -308,15 +307,14 @@ static struct page *try_to_get_dirent_page(struct file *file, unsigned long offs
         else if (create_cookie(rd_res.cookie, offset, inode))
                 goto error;
  
-       set_bit(PG_uptodate, &page->flags);
+       SetPageUptodate(page);
  unlock_out:
-       clear_bit(PG_locked, &page->flags);
-       wake_up(&page->wait);
+       UnlockPage(page);
  out:
         return page;
  
  error:
-       set_bit(PG_error, &page->flags);
+       SetPageError(page);
         goto unlock_out;
  }
  
@@ -371,12 +369,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
  
         offset = filp->f_pos >> PAGE_CACHE_SHIFT;
         hash = page_hash(inode, offset);
-       page = __find_page(inode, offset, *hash);
+       page = __find_get_page(inode, offset, *hash);
         if (!page)
                 goto no_dirent_page;
-       if (PageLocked(page))
-               goto dirent_locked_wait;
-       if (!PageUptodate(page))
+       if (!Page_Uptodate(page))
                 goto dirent_read_error;
  success:
         filp->f_pos = nfs_do_filldir((__u32 *) page_address(page),
@@ -389,9 +385,7 @@ no_dirent_page:
         if (!page)
                 goto no_page;
  
-dirent_locked_wait:
-       wait_on_page(page);
-       if (PageUptodate(page))
+       if (Page_Uptodate(page))
                 goto success;
  dirent_read_error:
         page_cache_release(page);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c

index 75b14988669e30feb71f48c47fd80d374bf8c725..f10ce96edaa9d3f6df7b257edab651e333ce8cc3 100644 (file)
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,6 +26,7 @@
  #include <linux/malloc.h>
  #include <linux/pagemap.h>
  #include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
  
  #include <asm/uaccess.h>
  #include <asm/segment.h>
@@ -78,6 +79,7 @@ struct inode_operations nfs_file_inode_operations = {
         NULL,                   /* smap */
         NULL,                   /* updatepage */
         nfs_revalidate,         /* revalidate */
+       NULL,                   /* flushpage */
  };
  
  /* Hack for future NFS swap support */
@@ -172,8 +174,11 @@ static long nfs_write_one_page(struct file *file, struct page *page, unsigned lo
  
         bytes -= copy_from_user((u8*)page_address(page) + offset, buf, bytes);
         status = -EFAULT;
-       if (bytes)
+       if (bytes) {
+               lock_kernel();
                 status = nfs_updatepage(file, page, offset, bytes);
+               unlock_kernel();
+       }
         return status;
  }
  
diff --git a/fs/nfs/read.c b/fs/nfs/read.c

index f606b76e46be4d6ea28ed6f9b8a33ce4570eae8f..c0a7adaee9c996ea840ec73c7f084ce9ce43c9dc 100644 (file)
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -77,7 +77,6 @@ nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page)
         int             flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
  
         dprintk("NFS: nfs_readpage_sync(%p)\n", page);
-       clear_bit(PG_error, &page->flags);
  
         do {
                 if (count < rsize)
@@ -111,16 +110,14 @@ nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page)
         } while (count);
  
         memset(buffer, 0, count);
-       set_bit(PG_uptodate, &page->flags);
+       SetPageUptodate(page);
         result = 0;
  
  io_error:
+       UnlockPage(page);
         /* Note: we don't refresh if the call returned error */
         if (refresh && result >= 0)
                 nfs_refresh_inode(inode, &rqst.ra_fattr);
-       /* N.B. Use nfs_unlock_page here? */
-       clear_bit(PG_locked, &page->flags);
-       wake_up(&page->wait);
         return result;
  }
  
@@ -146,17 +143,15 @@ nfs_readpage_result(struct rpc_task *task)
                         memset((char *) address + result, 0, PAGE_SIZE - result);
                 }
                 nfs_refresh_inode(req->ra_inode, &req->ra_fattr);
-               set_bit(PG_uptodate, &page->flags);
+               SetPageUptodate(page);
                 succ++;
         } else {
-               set_bit(PG_error, &page->flags);
+               SetPageError(page);
                 fail++;
                 dprintk("NFS: %d successful reads, %d failures\n", succ, fail);
         }
-       /* N.B. Use nfs_unlock_page here? */
-       clear_bit(PG_locked, &page->flags);
-       wake_up(&page->wait);
-
+       page->owner = (int)current; // HACK, FIXME, will go away.
+       UnlockPage(page);
         free_page(address);
  
         rpc_release_task(task);
@@ -229,8 +224,7 @@ nfs_readpage(struct file *file, struct page *page)
  
         dprintk("NFS: nfs_readpage (%p %ld@%ld)\n",
                 page, PAGE_SIZE, page->offset);
-       atomic_inc(&page->count);
-       set_bit(PG_locked, &page->flags);
+       get_page(page);
  
         /*
          * Try to flush any pending writes to the file..
@@ -256,8 +250,7 @@ nfs_readpage(struct file *file, struct page *page)
         goto out_free;
  
  out_error:
-       clear_bit(PG_locked, &page->flags);
-       wake_up(&page->wait);
+       UnlockPage(page);
  out_free:
         free_page(page_address(page));
  out:
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c

index c93dce2fe7c1f048665d178229a29350aad625b3..7d8cdbf74fbb5d9bcce1139846b28a95c7461f39 100644 (file)
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -65,20 +65,18 @@ static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode
                 goto out;
  
         hash = page_hash(inode, 0);
-       page = __find_page(inode, 0, *hash);
+repeat:
+       page = __find_lock_page(inode, 0, *hash);
         if (page) {
                 page_cache_free(page_cache);
-               goto out;
+               goto unlock_out;
         }
  
         page = page_cache_entry(page_cache);
-       atomic_inc(&page->count);
-       page->flags = ((page->flags &
-                       ~((1 << PG_uptodate) | (1 << PG_error))) |
-                      ((1 << PG_referenced) | (1 << PG_locked)));
-       page->offset = 0;
-       add_page_to_inode_queue(inode, page);
-       __add_page_to_hash_queue(page, hash);
+       if (add_to_page_cache_unique(page, inode, 0, hash)) {
+               page_cache_release(page);
+               goto repeat;
+       }
  
         /* We place the length at the beginning of the page,
          * in host byte order, followed by the string.  The
@@ -89,32 +87,28 @@ static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode
         if (rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK,
                      &rl_args, NULL, 0) < 0)
                 goto error;
-       set_bit(PG_uptodate, &page->flags);
+       SetPageUptodate(page);
  unlock_out:
-       clear_bit(PG_locked, &page->flags);
-       wake_up(&page->wait);
+       UnlockPage(page);
  out:
         return page;
  
  error:
-       set_bit(PG_error, &page->flags);
+       SetPageError(page);
         goto unlock_out;
  }
  
  static int nfs_readlink(struct dentry *dentry, char *buffer, int buflen)
  {
         struct inode *inode = dentry->d_inode;
-       struct page *page, **hash;
+       struct page *page;
         u32 *p, len;
  
         /* Caller revalidated the directory inode already. */
-       hash = page_hash(inode, 0);
-       page = __find_page(inode, 0, *hash);
+       page = find_get_page(inode, 0);
         if (!page)
                 goto no_readlink_page;
-       if (PageLocked(page))
-               goto readlink_locked_wait;
-       if (!PageUptodate(page))
+       if (!Page_Uptodate(page))
                 goto readlink_read_error;
  success:
         p = (u32 *) page_address(page);
@@ -129,9 +123,7 @@ no_readlink_page:
         page = try_to_get_symlink_page(dentry, inode);
         if (!page)
                 goto no_page;
-readlink_locked_wait:
-       wait_on_page(page);
-       if (PageUptodate(page))
+       if (Page_Uptodate(page))
                 goto success;
  readlink_read_error:
         page_cache_release(page);
@@ -144,17 +136,14 @@ nfs_follow_link(struct dentry *dentry, struct dentry *base, unsigned int follow)
  {
         struct dentry *result;
         struct inode *inode = dentry->d_inode;
-       struct page *page, **hash;
+       struct page *page;
         u32 *p;
  
         /* Caller revalidated the directory inode already. */
-       hash = page_hash(inode, 0);
-       page = __find_page(inode, 0, *hash);
+       page = find_get_page(inode, 0);
         if (!page)
                 goto no_followlink_page;
-       if (PageLocked(page))
-               goto followlink_locked_wait;
-       if (!PageUptodate(page))
+       if (!Page_Uptodate(page))
                 goto followlink_read_error;
  success:
         p = (u32 *) page_address(page);
@@ -166,9 +155,7 @@ no_followlink_page:
         page = try_to_get_symlink_page(dentry, inode);
         if (!page)
                 goto no_page;
-followlink_locked_wait:
-       wait_on_page(page);
-       if (PageUptodate(page))
+       if (Page_Uptodate(page))
                 goto success;
  followlink_read_error:
         page_cache_release(page);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index 8da08f06b8435aa43c430e4d94e8ec0f414fa2ca..de5ab535db585cc1b47df892ef9d925ca085e0f9 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -110,7 +110,7 @@ nfs_writepage_sync(struct dentry *dentry, struct inode *inode,
  
                 if (result < 0) {
                         /* Must mark the page invalid after I/O error */
-                       clear_bit(PG_uptodate, &page->flags);
+                       ClearPageUptodate(page);
                         goto io_error;
                 }
                 if (result != wsize)
@@ -463,7 +463,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
          * Ok, there's another user of this page with the new request..
          * The IO completion will then free the page and the dentry.
          */
-       atomic_inc(&page->count);
+       get_page(page);
         file->f_count++;
  
         /* Schedule request */
@@ -471,7 +471,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
  
  updated:
         if (req->wb_bytes == PAGE_SIZE)
-               set_bit(PG_uptodate, &page->flags);
+               SetPageUptodate(page);
  
         retval = count;
         if (synchronous) {
@@ -486,7 +486,7 @@ updated:
                 }
  
                 if (retval < 0)
-                       clear_bit(PG_uptodate, &page->flags);
+                       ClearPageUptodate(page);
         }
  
         free_write_request(req);
@@ -682,7 +682,7 @@ nfs_wback_result(struct rpc_task *task)
         rpc_release_task(task);
  
         if (WB_INVALIDATE(req))
-               clear_bit(PG_uptodate, &page->flags);
+               ClearPageUptodate(page);
  
         __free_page(page);
         remove_write_request(&NFS_WRITEBACK(inode), req);
diff --git a/fs/pipe.c b/fs/pipe.c

index 3283240a9c2ae9f08e5f0d23fbad0533e6fd0c31..90b5df368d4f190987d3148b36a8cb5f1aae7dca 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -7,6 +7,7 @@
  #include <linux/mm.h>
  #include <linux/file.h>
  #include <linux/poll.h>
+#include <linux/malloc.h>
  
  #include <asm/uaccess.h>
  
@@ -249,8 +250,10 @@ static unsigned int connect_poll(struct file * filp, poll_table * wait)
  static int pipe_release(struct inode * inode)
  {
         if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
-               free_page((unsigned long) PIPE_BASE(*inode));
-               PIPE_BASE(*inode) = NULL;
+               struct pipe_inode_info *info = inode->i_pipe;
+               inode->i_pipe = NULL;
+               free_page((unsigned long) info->base);
+               kfree(info);
         }
         wake_up_interruptible(&PIPE_WAIT(*inode));
         return 0;
@@ -404,36 +407,48 @@ static struct inode * get_pipe_inode(void)
  {
         extern struct inode_operations pipe_inode_operations;
         struct inode *inode = get_empty_inode();
+       unsigned long page;
  
-       if (inode) {
-               unsigned long page = __get_free_page(GFP_USER);
-
-               if (!page) {
-                       iput(inode);
-                       inode = NULL;
-               } else {
-                       PIPE_BASE(*inode) = (char *) page;
-                       inode->i_op = &pipe_inode_operations;
-                       init_waitqueue_head(&PIPE_WAIT(*inode));
-                       PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
-                       PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
-                       PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
-                       PIPE_LOCK(*inode) = 0;
-                       /*
-                        * Mark the inode dirty from the very beginning,
-                        * that way it will never be moved to the dirty
-                        * list because "mark_inode_dirty()" will think
-                        * that it already _is_ on the dirty list.
-                        */
-                       inode->i_state = I_DIRTY;
-                       inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
-                       inode->i_uid = current->fsuid;
-                       inode->i_gid = current->fsgid;
-                       inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-                       inode->i_blksize = PAGE_SIZE;
-               }
-       }
+       if (!inode)
+               goto fail_inode;
+
+       page = __get_free_page(GFP_USER);
+
+       if (!page)
+               goto fail_iput;
+
+       /* XXX */
+       inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+       if (!inode->i_pipe)
+               goto fail_page;
+
+       PIPE_BASE(*inode) = (char *) page;
+       inode->i_op = &pipe_inode_operations;
+       init_waitqueue_head(&PIPE_WAIT(*inode));
+       PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+       PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
+       PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+       PIPE_LOCK(*inode) = 0;
+       /*
+        * Mark the inode dirty from the very beginning,
+        * that way it will never be moved to the dirty
+        * list because "mark_inode_dirty()" will think
+        * that it already _is_ on the dirty list.
+        */
+       inode->i_state = I_DIRTY;
+       inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
+       inode->i_uid = current->fsuid;
+       inode->i_gid = current->fsgid;
+       inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+       inode->i_blksize = PAGE_SIZE;
         return inode;
+
+fail_page:
+       free_page(page);
+fail_iput:
+       iput(inode);
+fail_inode:
+       return NULL;
  }
  
  struct inode_operations pipe_inode_operations = {
@@ -513,6 +528,8 @@ close_f12_inode_i:
         put_unused_fd(i);
  close_f12_inode:
         free_page((unsigned long) PIPE_BASE(*inode));
+       kfree(inode->i_pipe);
+       inode->i_pipe = NULL;
         iput(inode);
  close_f12:
         put_filp(f2);
diff --git a/fs/proc/array.c b/fs/proc/array.c

index a78b91af56cf5c7b40345fac3ee92e75a200e89b..4e766ca2b5eb7d587d7aeee1bfb3a9614e66e64d 100644 (file)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -348,7 +348,7 @@ static int get_meminfo(char * buffer)
         len = sprintf(buffer, "        total:    used:    free:  shared: buffers:  cached:\n"
                 "Mem:  %8lu %8lu %8lu %8lu %8lu %8lu\n"
                 "Swap: %8lu %8lu %8lu\n",
-               i.totalram, i.totalram-i.freeram, i.freeram, i.sharedram, i.bufferram, page_cache_size*PAGE_SIZE,
+               i.totalram, i.totalram-i.freeram, i.freeram, i.sharedram, i.bufferram, atomic_read(&page_cache_size)*PAGE_SIZE,
                 i.totalswap, i.totalswap-i.freeswap, i.freeswap);
         /*
          * Tagged format, for easy grepping and expansion. The above will go away
@@ -359,14 +359,14 @@ static int get_meminfo(char * buffer)
                 "MemFree:   %8lu kB\n"
                 "MemShared: %8lu kB\n"
                 "Buffers:   %8lu kB\n"
-               "Cached:    %8lu kB\n"
+               "Cached:    %8u kB\n"
                 "SwapTotal: %8lu kB\n"
                 "SwapFree:  %8lu kB\n",
                 i.totalram >> 10,
                 i.freeram >> 10,
                 i.sharedram >> 10,
                 i.bufferram >> 10,
-               page_cache_size << (PAGE_SHIFT - 10),
+               atomic_read(&page_cache_size) << (PAGE_SHIFT - 10),
                 i.totalswap >> 10,
                 i.freeswap >> 10);
  }
@@ -975,7 +975,7 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned
                         ++*dirty;
                 if (MAP_NR(pte_page(page)) >= max_mapnr)
                         continue;
-               if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) > 1)
+               if (page_count(mem_map + MAP_NR(pte_page(page))) > 1)
                         ++*shared;
         } while (address < end);
  }
diff --git a/fs/proc/mem.c b/fs/proc/mem.c

index 117587d8d74871447b8f2273ea4465cd6c6db8d4..df04473833fe75b219860d15d6de0cc65046ef48 100644 (file)
--- a/fs/proc/mem.c
+++ b/fs/proc/mem.c
@@ -298,7 +298,7 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma)
                 set_pte(dest_table, *src_table);
                 mapnr = MAP_NR(pte_page(*src_table));
                 if (mapnr < max_mapnr)
-                       atomic_inc(&mem_map[MAP_NR(pte_page(*src_table))].count);
+                       get_page(mem_map + MAP_NR(pte_page(*src_table)));
  
                 stmp += PAGE_SIZE;
                 dtmp += PAGE_SIZE;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c

index 2611ceb61c0124a4d39609ded30e465fb1e83e9f..1a4a0add1ba0244809cfaa22fafc86ba5f7e2391 100644 (file)
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -14,6 +14,7 @@
  #include <linux/mm.h>
  #include <linux/malloc.h>
  #include <linux/pagemap.h>
+#include <linux/smp_lock.h>
  
  #include <asm/uaccess.h>
  #include <asm/system.h>
@@ -271,8 +272,11 @@ static long smb_write_one_page(struct file *file, struct page *page, unsigned lo
  
         bytes -= copy_from_user((u8*)page_address(page) + offset, buf, bytes);
         status = -EFAULT;
-       if (bytes)
+       if (bytes) {
+               lock_kernel();
                 status = smb_updatepage(file, page, offset, bytes);
+               unlock_kernel();
+       }
         return status;
  }
  
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h

index 0490404b7b6c14f1415154baede7cd9730ac6dc7..fb44a2a27f7905373b8f949164d0679d72557714 100644 (file)
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -84,6 +84,14 @@ typedef unsigned long pgprot_t;
  
  #define __PAGE_OFFSET          (PAGE_OFFSET_RAW)
  
+#ifndef __ASSEMBLY__
+
+#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); *(int *)0=0; } while (0)
+#define PAGE_BUG(page) do { \
+                               BUG(); } while (0)
+
+#endif /* __ASSEMBLY__ */
+
  #define PAGE_OFFSET            ((unsigned long)__PAGE_OFFSET)
  #define __pa(x)                        ((unsigned long)(x)-PAGE_OFFSET)
  #define __va(x)                        ((void *)((unsigned long)(x)+PAGE_OFFSET))
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h

index dcd36e63cefa467aa435d831b08fc28668747d2d..f0eba99c14b35dab2530cec223265c6600e8d835 100644 (file)
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -556,6 +556,7 @@ extern void ext2_check_inodes_bitmap (struct super_block *);
  extern int ext2_bmap (struct inode *, int);
  
  extern struct buffer_head * ext2_getblk (struct inode *, long, int, int *);
+extern int ext2_getblk_block (struct inode *, long, int, int *, int *);
  extern struct buffer_head * ext2_bread (struct inode *, int, int, int *);
  
  extern int ext2_getcluster (struct inode * inode, long block);
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 55b07ba486ec96f13ed39fcbce29b0a11f210ac2..0beb57019d24d8e94a4754247ac56fd192acae8f 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -74,11 +74,11 @@ extern int max_super_blocks, nr_super_blocks;
  
  /* public flags for file_system_type */
  #define FS_REQUIRES_DEV 1 
-#define FS_NO_DCACHE    2 /* Only dcache the necessary things. */
-#define FS_NO_PRELIM    4 /* prevent preloading of dentries, even if
+#define FS_NO_DCACHE   2 /* Only dcache the necessary things. */
+#define FS_NO_PRELIM   4 /* prevent preloading of dentries, even if
                            * FS_NO_DCACHE is not set.
                            */
-#define FS_IBASKET      8 /* FS does callback to free_ibasket() if space gets low. */
+#define FS_IBASKET     8 /* FS does callback to free_ibasket() if space gets low. */
  
  /*
   * These are the fs-independent mount-flags: up to 16 flags are supported
@@ -94,9 +94,9 @@ extern int max_super_blocks, nr_super_blocks;
  #define S_APPEND       256     /* Append-only file */
  #define S_IMMUTABLE    512     /* Immutable file */
  #define MS_NOATIME     1024    /* Do not update access times. */
-#define MS_NODIRATIME   2048    /* Do not update directory access times */
+#define MS_NODIRATIME  2048    /* Do not update directory access times */
  
-#define MS_ODD_RENAME   32768    /* Temporary stuff; will go away as soon
+#define MS_ODD_RENAME  32768   /* Temporary stuff; will go away as soon
                                   * as nfs_rename() will be cleaned up
                                   */
  
@@ -189,7 +189,6 @@ typedef char buffer_block[BLOCK_SIZE];
  #define BH_Lock                2       /* 1 if the buffer is locked */
  #define BH_Req         3       /* 0 if the buffer has been invalidated */
  #define BH_Protected   6       /* 1 if the buffer is protected */
-
  /*
   * Try to keep the most commonly used fields in single cache lines (16
   * bytes) to improve performance.  This ordering should be
@@ -218,7 +217,7 @@ struct buffer_head {
         /* Non-performance-critical data follows. */
         char * b_data;                  /* pointer to data block (1024 bytes) */
         unsigned int b_list;            /* List that this buffer appears */
-       unsigned long b_flushtime;      /* Time when this (dirty) buffer
+       unsigned long b_flushtime;      /* Time when this (dirty) buffer
                                          * should be written */
         wait_queue_head_t b_wait;
         struct buffer_head ** b_pprev;          /* doubly linked list of hash-queue */
@@ -365,22 +364,21 @@ struct inode {
         struct vm_area_struct   *i_mmap;
         struct page             *i_pages;
         struct dquot            *i_dquot[MAXQUOTAS];
+       struct pipe_inode_info  *i_pipe;
  
         unsigned long           i_state;
  
         unsigned int            i_flags;
-       unsigned char           i_pipe;
         unsigned char           i_sock;
  
         int                     i_writecount;
         unsigned int            i_attr_flags;
         __u32                   i_generation;
         union {
-               struct pipe_inode_info          pipe_i;
                 struct minix_inode_info         minix_i;
                 struct ext2_inode_info          ext2_i;
                 struct hpfs_inode_info          hpfs_i;
-               struct ntfs_inode_info          ntfs_i;
+               struct ntfs_inode_info          ntfs_i;
                 struct msdos_inode_info         msdos_i;
                 struct umsdos_inode_info        umsdos_i;
                 struct iso_inode_info           isofs_i;
@@ -388,13 +386,13 @@ struct inode {
                 struct sysv_inode_info          sysv_i;
                 struct affs_inode_info          affs_i;
                 struct ufs_inode_info           ufs_i;
-               struct efs_inode_info           efs_i;     
+               struct efs_inode_info           efs_i;
                 struct romfs_inode_info         romfs_i;
                 struct coda_inode_info          coda_i;
                 struct smb_inode_info           smbfs_i;
                 struct hfs_inode_info           hfs_i;
                 struct adfs_inode_info          adfs_i;
-               struct qnx4_inode_info          qnx4_i;    
+               struct qnx4_inode_info          qnx4_i;
                 struct socket                   socket_i;
                 void                            *generic_ip;
         } u;
@@ -491,10 +489,10 @@ extern void posix_block_lock(struct file_lock *, struct file_lock *);
  extern void posix_unblock_lock(struct file_lock *);
  
  struct fasync_struct {
-       int    magic;
-       int    fa_fd;
-       struct fasync_struct    *fa_next; /* singly linked list */
-       struct file             *fa_file;
+       int     magic;
+       int     fa_fd;
+       struct  fasync_struct   *fa_next; /* singly linked list */
+       struct  file            *fa_file;
  };
  
  #define FASYNC_MAGIC 0x4601
@@ -547,19 +545,19 @@ struct super_block {
                 struct minix_sb_info    minix_sb;
                 struct ext2_sb_info     ext2_sb;
                 struct hpfs_sb_info     hpfs_sb;
-               struct ntfs_sb_info     ntfs_sb;
+               struct ntfs_sb_info     ntfs_sb;
                 struct msdos_sb_info    msdos_sb;
                 struct isofs_sb_info    isofs_sb;
                 struct nfs_sb_info      nfs_sb;
                 struct sysv_sb_info     sysv_sb;
                 struct affs_sb_info     affs_sb;
                 struct ufs_sb_info      ufs_sb;
-               struct efs_sb_info      efs_sb;    
+               struct efs_sb_info      efs_sb;
                 struct romfs_sb_info    romfs_sb;
                 struct smb_sb_info      smbfs_sb;
                 struct hfs_sb_info      hfs_sb;
                 struct adfs_sb_info     adfs_sb;
-               struct qnx4_sb_info     qnx4_sb;           
+               struct qnx4_sb_info     qnx4_sb;
                 void                    *generic_sbp;
         } u;
         /*
@@ -624,6 +622,7 @@ struct inode_operations {
         int (*smap) (struct inode *,int);
         int (*updatepage) (struct file *, struct page *, unsigned long, unsigned int);
         int (*revalidate) (struct dentry *);
+       int (*flushpage) (struct inode *, struct page *, int);
  };
  
  struct super_operations {
@@ -749,13 +748,19 @@ extern int fs_may_mount(kdev_t);
  
  extern struct file *inuse_filps;
  
-extern void refile_buffer(struct buffer_head *);
  extern void set_writetime(struct buffer_head *, int);
  extern int try_to_free_buffers(struct page *);
+extern void __refile_buffer(struct buffer_head * buf);
+extern inline void refile_buffer(struct buffer_head * buf)
+{
+       /*
+        * Subtle, we do not want to refile not hashed buffers ...
+        */
+       if (buf->b_pprev)
+               __refile_buffer(buf);
+}
  
-extern int nr_buffers;
  extern int buffermem;
-extern int nr_buffer_heads;
  
  #define BUF_CLEAN      0
  #define BUF_LOCKED     1       /* Buffers scheduled for write */
@@ -869,11 +874,15 @@ extern struct buffer_head * breada(kdev_t, int, int, unsigned int, unsigned int)
  extern int brw_page(int, struct page *, kdev_t, int [], int, int);
  
  typedef long (*writepage_t)(struct file *, struct page *, unsigned long, unsigned long, const char *);
+typedef int (*fs_getblock_t)(struct inode *, long, int, int *, int *);
+
  
  extern int generic_readpage(struct file *, struct page *);
  extern int generic_file_mmap(struct file *, struct vm_area_struct *);
  extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
  extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *, writepage_t);
+extern int generic_block_flushpage(struct inode *, struct page *, int);
+extern long block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block);
  
  extern struct super_block *get_super(kdev_t);
  extern void put_super(kdev_t);
diff --git a/include/linux/hpfs_fs_i.h b/include/linux/hpfs_fs_i.h

index 7dea10b41ba7ba60bbe39cc45eebbeb25ff01b4d..8263bf2d7178246610d8ffef029c49f61945745e 100644 (file)
--- a/include/linux/hpfs_fs_i.h
+++ b/include/linux/hpfs_fs_i.h
@@ -1,17 +1,7 @@
  #ifndef _HPFS_FS_I
  #define _HPFS_FS_I
  
-#if ANALWARNINGS
-#warning Fix the FIFO stuff!
-#warning Fix the FIFO stuff!
-#warning Fix the FIFO stuff!
-#endif
-
  struct hpfs_inode_info {
-       union {                             /* Linux sometimes destroys this structure */
-               struct pipe_inode_info bla; /* due to a bug. Linus doesn't want to fix */
-               struct socket ble;          /* it so I had to write this workaround :-) */
-       } dummy;
         ino_t i_parent_dir;     /* (directories) gives fnode of parent dir */
         unsigned i_dno;         /* (directories) root dnode */
         unsigned i_dpos;        /* (directories) temp for readdir */
diff --git a/include/linux/mm.h b/include/linux/mm.h

index f6fc6c5b08329066deabb40df86407601853a881..c5e3f5bc95b689057a7bde873e305b4318686076 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -129,29 +129,56 @@ typedef struct page {
         wait_queue_head_t wait;
         struct page **pprev_hash;
         struct buffer_head * buffers;
+       int owner; /* temporary debugging check */
  } mem_map_t;
  
+#define get_page(p) do { atomic_inc(&(p)->count); \
+                                               } while (0)
+#define put_page(p) __free_page(p)
+#define put_page_testzero(p) ({ int __ret = atomic_dec_and_test(&(p)->count);\
+                               __ret; })
+#define page_count(p) atomic_read(&(p)->count)
+#define set_page_count(p,v) do { atomic_set(&(p)->count, v); \
+                               } while (0)
+
  /* Page flag bit values */
  #define PG_locked               0
  #define PG_error                1
  #define PG_referenced           2
-#define PG_dirty                3
-#define PG_uptodate             4
-#define PG_free_after           5
-#define PG_decr_after           6
-#define PG_swap_unlock_after    7
-#define PG_DMA                  8
-#define PG_Slab                         9
-#define PG_swap_cache          10
-#define PG_skip                        11
+#define PG_uptodate             3
+#define PG_free_after           4
+#define PG_decr_after           5
+#define PG_swap_unlock_after    6
+#define PG_DMA                  7
+#define PG_Slab                         8
+#define PG_swap_cache           9
+#define PG_skip                        10
+                               /* bits 21-30 unused */
  #define PG_reserved            31
  
+
  /* Make it prettier to test the above... */
+#define Page_Uptodate(page)    (test_bit(PG_uptodate, &(page)->flags))
+#define SetPageUptodate(page)  do { set_bit(PG_uptodate, &(page)->flags); \
+                                       } while (0)
+#define ClearPageUptodate(page)        do { clear_bit(PG_uptodate, &(page)->flags); \
+                                       } while (0)
  #define PageLocked(page)       (test_bit(PG_locked, &(page)->flags))
+#define LockPage(page)         \
+       do { int _ret = test_and_set_bit(PG_locked, &(page)->flags); \
+       if (_ret) PAGE_BUG(page); \
+       page->owner = (int)current; } while (0)
+#define TryLockPage(page)      ({ int _ret = test_and_set_bit(PG_locked, &(page)->flags); \
+                               if (!_ret) page->owner = (int)current; _ret; })
+#define UnlockPage(page)       do { \
+                                       if (page->owner != (int)current) { \
+BUG(); } page->owner = 0; \
+if (!test_and_clear_bit(PG_locked, &(page)->flags)) { \
+                               BUG(); } wake_up(&page->wait); } while (0)
  #define PageError(page)                (test_bit(PG_error, &(page)->flags))
+#define SetPageError(page)     ({ int _ret = test_and_set_bit(PG_error, &(page)->flags); _ret; })
+#define ClearPageError(page)   do { if (!test_and_clear_bit(PG_error, &(page)->flags)) BUG(); } while (0)
  #define PageReferenced(page)   (test_bit(PG_referenced, &(page)->flags))
-#define PageDirty(page)                (test_bit(PG_dirty, &(page)->flags))
-#define PageUptodate(page)     (test_bit(PG_uptodate, &(page)->flags))
  #define PageFreeAfter(page)    (test_bit(PG_free_after, &(page)->flags))
  #define PageDecrAfter(page)    (test_bit(PG_decr_after, &(page)->flags))
  #define PageSwapUnlockAfter(page) (test_bit(PG_swap_unlock_after, &(page)->flags))
@@ -163,16 +190,12 @@ typedef struct page {
  #define PageSetSlab(page)      (set_bit(PG_Slab, &(page)->flags))
  #define PageSetSwapCache(page) (set_bit(PG_swap_cache, &(page)->flags))
  
-#define PageTestandSetDirty(page)      \
-                       (test_and_set_bit(PG_dirty, &(page)->flags))
  #define PageTestandSetSwapCache(page)  \
                         (test_and_set_bit(PG_swap_cache, &(page)->flags))
  
  #define PageClearSlab(page)    (clear_bit(PG_Slab, &(page)->flags))
  #define PageClearSwapCache(page)(clear_bit(PG_swap_cache, &(page)->flags))
  
-#define PageTestandClearDirty(page) \
-                       (test_and_clear_bit(PG_dirty, &(page)->flags))
  #define PageTestandClearSwapCache(page)        \
                         (test_and_clear_bit(PG_swap_cache, &(page)->flags))
  
@@ -387,7 +410,7 @@ extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned
  
  #define buffer_under_min()     ((buffermem >> PAGE_SHIFT) * 100 < \
                                 buffer_mem.min_percent * num_physpages)
-#define pgcache_under_min()    (page_cache_size * 100 < \
+#define pgcache_under_min()    (atomic_read(&page_cache_size) * 100 < \
                                 page_cache.min_percent * num_physpages)
  
  #endif /* __KERNEL__ */
diff --git a/include/linux/msdos_fs_i.h b/include/linux/msdos_fs_i.h

index fcb746552aa6141d566d275af580680935355ebb..b2381f6d4f90d8435269bc78da41e97bea57a2dc 100644 (file)
--- a/include/linux/msdos_fs_i.h
+++ b/include/linux/msdos_fs_i.h
@@ -1,30 +1,11 @@
  #ifndef _MSDOS_FS_I
  #define _MSDOS_FS_I
  
-#ifndef _LINUX_PIPE_FS_I_H
-#include <linux/pipe_fs_i.h>
-#endif
-
  /*
   * MS-DOS file system inode data in memory
   */
  
  struct msdos_inode_info {
-       /*
-               UMSDOS manage special file and fifo as normal empty
-               msdos file. fifo inode processing conflict with msdos
-               processing. So I insert the pipe_inode_info so the
-               information does not overlap. This increases the size of
-               the msdos_inode_info, but the clear winner here is
-               the ext2_inode_info. So it does not change anything to
-               the total size of a struct inode.
-
-               I have not put it conditional. With the advent of loadable
-               file system drivers, it would be very easy to compile
-               a MS-DOS FS driver unaware of UMSDOS and then later to
-               load a (then incompatible) UMSDOS FS driver.
-       */
-       struct pipe_inode_info reserved;
         int i_start;    /* first cluster or 0 */
         int i_logstart; /* logical first cluster */
         int i_attrs;    /* unused attribute bits */
diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h

index d21f0078fdeb2767b59f0a195814f04cccf526a1..13bf610b6389563a4cc1744b0c61dca5f63bcefb 100644 (file)
--- a/include/linux/nfs_fs_i.h
+++ b/include/linux/nfs_fs_i.h
@@ -8,13 +8,6 @@
   * nfs fs inode data in memory
   */
  struct nfs_inode_info {
-       /*
-        * This is a place holder so named pipes on NFS filesystems
-        * work (more or less correctly). This must be first in the
-        * struct because the data is really accessed via inode->u.pipe_i.
-        */
-       struct pipe_inode_info  pipeinfo;
-
         /*
          * Various flags
          */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h

index 218098416eb53c1961f5fc2781b10777a76d5bcf..1e0f1265b2bad67c398e2d2e89634e11c43047d5 100644 (file)
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -39,10 +39,10 @@ static inline unsigned long page_address(struct page * page)
   */
  #define page_cache_entry(x)    (mem_map + MAP_NR(x))
  
-#define PAGE_HASH_BITS 12
+#define PAGE_HASH_BITS 16
  #define PAGE_HASH_SIZE (1 << PAGE_HASH_BITS)
  
-extern unsigned long page_cache_size; /* # of pages currently in the hash table */
+extern atomic_t page_cache_size; /* # of pages currently in the hash table */
  extern struct page * page_hash_table[PAGE_HASH_SIZE];
  
  /*
@@ -64,50 +64,18 @@ static inline unsigned long _page_hashfn(struct inode * inode, unsigned long off
  
  #define page_hash(inode,offset) (page_hash_table+_page_hashfn(inode,offset))
  
-static inline struct page * __find_page(struct inode * inode, unsigned long offset, struct page *page)
-{
-       goto inside;
-       for (;;) {
-               page = page->next_hash;
-inside:
-               if (!page)
-                       goto not_found;
-               if (page->inode != inode)
-                       continue;
-               if (page->offset == offset)
-                       break;
-       }
-       /* Found the page. */
-       atomic_inc(&page->count);
-       set_bit(PG_referenced, &page->flags);
-not_found:
-       return page;
-}
-
-static inline struct page *find_page(struct inode * inode, unsigned long offset)
-{
-       return __find_page(inode, offset, *page_hash(inode, offset));
-}
+extern struct page * __find_get_page (struct inode * inode,
+                               unsigned long offset, struct page *page);
+#define find_get_page(inode, offset) \
+               __find_get_page(inode, offset, *page_hash(inode, offset))
+extern struct page * __find_lock_page (struct inode * inode,
+                               unsigned long offset, struct page *page);
+#define find_lock_page(inode, offset) \
+               __find_lock_page(inode, offset, *page_hash(inode, offset))
  
-static inline void remove_page_from_hash_queue(struct page * page)
-{
-       if(page->pprev_hash) {
-               if(page->next_hash)
-                       page->next_hash->pprev_hash = page->pprev_hash;
-               *page->pprev_hash = page->next_hash;
-               page->pprev_hash = NULL;
-       }
-       page_cache_size--;
-}
+extern void __add_page_to_hash_queue(struct page * page, struct page **p);
  
-static inline void __add_page_to_hash_queue(struct page * page, struct page **p)
-{
-       page_cache_size++;
-       if((page->next_hash = *p) != NULL)
-               (*p)->pprev_hash = &page->next_hash;
-       *p = page;
-       page->pprev_hash = p;
-}
+extern int add_to_page_cache_unique(struct page * page, struct inode * inode, unsigned long offset, struct page **hash);
  
  static inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset)
  {
@@ -118,7 +86,6 @@ static inline void remove_page_from_inode_queue(struct page * page)
  {
         struct inode * inode = page->inode;
  
-       page->inode = NULL;
         inode->i_nrpages--;
         if (inode->i_pages == page)
                 inode->i_pages = page->next;
@@ -142,11 +109,13 @@ static inline void add_page_to_inode_queue(struct inode * inode, struct page * p
         *p = page;
  }
  
-extern void __wait_on_page(struct page *);
+extern void ___wait_on_page(struct page *);
+
  static inline void wait_on_page(struct page * page)
  {
+
         if (PageLocked(page))
-               __wait_on_page(page);
+               ___wait_on_page(page);
  }
  
  extern void update_vm_cache(struct inode *, unsigned long, const char *, int);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h

index 7d44e829375e3e332bbca3a05eb337a994a7ead7..c00d37845cdf57d0f7c304f46e21facde084c556 100644 (file)
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -12,15 +12,15 @@ struct pipe_inode_info {
         unsigned int writers;
  };
  
-#define PIPE_WAIT(inode)       ((inode).u.pipe_i.wait)
-#define PIPE_BASE(inode)       ((inode).u.pipe_i.base)
-#define PIPE_START(inode)      ((inode).u.pipe_i.start)
+#define PIPE_WAIT(inode)       ((inode).i_pipe->wait)
+#define PIPE_BASE(inode)       ((inode).i_pipe->base)
+#define PIPE_START(inode)      ((inode).i_pipe->start)
  #define PIPE_LEN(inode)                ((inode).i_size)
-#define PIPE_RD_OPENERS(inode) ((inode).u.pipe_i.rd_openers)
-#define PIPE_WR_OPENERS(inode) ((inode).u.pipe_i.wr_openers)
-#define PIPE_READERS(inode)    ((inode).u.pipe_i.readers)
-#define PIPE_WRITERS(inode)    ((inode).u.pipe_i.writers)
-#define PIPE_LOCK(inode)       ((inode).u.pipe_i.lock)
+#define PIPE_RD_OPENERS(inode) ((inode).i_pipe->rd_openers)
+#define PIPE_WR_OPENERS(inode) ((inode).i_pipe->wr_openers)
+#define PIPE_READERS(inode)    ((inode).i_pipe->readers)
+#define PIPE_WRITERS(inode)    ((inode).i_pipe->writers)
+#define PIPE_LOCK(inode)       ((inode).i_pipe->lock)
  #define PIPE_SIZE(inode)       PIPE_LEN(inode)
  
  #define PIPE_EMPTY(inode)      (PIPE_SIZE(inode)==0)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index fb0c79741832f1ee3d0ce7d53d8c83a2b62f7ba2..3872e95e4645be3f4ab2b8d2fbd25d6f7e51da4d 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -286,7 +286,7 @@ struct task_struct {
         gid_t gid,egid,sgid,fsgid;
         int ngroups;
         gid_t   groups[NGROUPS];
-        kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
+       kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
         struct user_struct *user;
  /* limits */
         struct rlimit rlim[RLIM_NLIMITS];
@@ -601,7 +601,7 @@ extern inline int capable(int cap)
  #else
         if (cap_is_fs_cap(cap) ? current->fsuid == 0 : current->euid == 0)
  #endif
-        {
+       {
                 current->flags |= PF_SUPERPRIV;
                 return 1;
         }
diff --git a/include/linux/swap.h b/include/linux/swap.h

index bc9fb4e48f66b11e2ba786ed543e927e36cb02ac..f0ba314054cb162c6ad3518697a1bd70e7023452 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -67,7 +67,7 @@ extern int nr_swap_pages;
  extern int nr_free_pages;
  extern atomic_t nr_async_pages;
  extern struct inode swapper_inode;
-extern unsigned long page_cache_size;
+extern atomic_t page_cache_size;
  extern int buffermem;
  
  /* Incomplete types for prototype declarations: */
@@ -163,7 +163,7 @@ static inline int is_page_shared(struct page *page)
         unsigned int count;
         if (PageReserved(page))
                 return 1;
-       count = atomic_read(&page->count);
+       count = page_count(page);
         if (PageSwapCache(page))
                 count += swap_count(page->offset) - 2;
         if (PageFreeAfter(page))
diff --git a/include/linux/synclink.h b/include/linux/synclink.h

index 69dc86725f5b0c342893873ad410611d89beac5c..70ce90e103a9d5d7f94eb7ecf27268d4d42ec203 100644 (file)
--- a/include/linux/synclink.h
+++ b/include/linux/synclink.h
@@ -1,6 +1,8 @@
  /*
   * SyncLink Multiprotocol Serial Adapter Driver
   *
+ * ==FILEDATE 19990523==
+ *
   * Copyright (C) 1998 by Microgate Corporation
   * 
   * Redistribution of this file is permitted under 
@@ -66,11 +68,16 @@
  #define HDLC_FLAG_AUTO_RTS             0x0080
  #define HDLC_FLAG_RXC_DPLL             0x0100
  #define HDLC_FLAG_RXC_BRG              0x0200
+#define HDLC_FLAG_RXC_TXCPIN   0x8000
+#define HDLC_FLAG_RXC_RXCPIN   0x0000
  #define HDLC_FLAG_TXC_DPLL             0x0400
  #define HDLC_FLAG_TXC_BRG              0x0800
+#define HDLC_FLAG_TXC_TXCPIN   0x0000
+#define HDLC_FLAG_TXC_RXCPIN   0x0008
  #define HDLC_FLAG_DPLL_DIV8            0x1000
  #define HDLC_FLAG_DPLL_DIV16           0x2000
  #define HDLC_FLAG_DPLL_DIV32           0x0000
+#define HDLC_FLAG_HDLC_LOOPMODE                0x4000
  
  #define HDLC_CRC_NONE                  0
  #define HDLC_CRC_16_CCITT              1
@@ -87,6 +94,7 @@
  #define HDLC_ENCODING_NRZB                     1
  #define HDLC_ENCODING_NRZI_MARK                        2
  #define HDLC_ENCODING_NRZI_SPACE               3
+#define HDLC_ENCODING_NRZI                     HDLC_ENCODING_NRZI_SPACE
  #define HDLC_ENCODING_BIPHASE_MARK             4
  #define HDLC_ENCODING_BIPHASE_SPACE            5
  #define HDLC_ENCODING_BIPHASE_LEVEL            6
@@ -227,17 +235,19 @@ struct mgsl_icount {
   * MGSL_IOCTXABORT     abort transmitting frame (HDLC)
   * MGSL_IOCGSTATS      return current statistics
   * MGSL_IOCWAITEVENT   wait for specified event to occur
+ * MGSL_LOOPTXDONE     transmit in HDLC LoopMode done
   */
  #define MGSL_MAGIC_IOC 'm'
-#define MGSL_IOCSPARAMS                _IOW(MGSL_MAGIC_IOC,0,sizeof(MGSL_PARAMS))
-#define MGSL_IOCGPARAMS                _IOR(MGSL_MAGIC_IOC,1,sizeof(MGSL_PARAMS))
+#define MGSL_IOCSPARAMS                _IOW(MGSL_MAGIC_IOC,0,struct _MGSL_PARAMS)
+#define MGSL_IOCGPARAMS                _IOR(MGSL_MAGIC_IOC,1,struct _MGSL_PARAMS)
  #define MGSL_IOCSTXIDLE                _IO(MGSL_MAGIC_IOC,2)
  #define MGSL_IOCGTXIDLE                _IO(MGSL_MAGIC_IOC,3)
  #define MGSL_IOCTXENABLE       _IO(MGSL_MAGIC_IOC,4)
  #define MGSL_IOCRXENABLE       _IO(MGSL_MAGIC_IOC,5)
  #define MGSL_IOCTXABORT                _IO(MGSL_MAGIC_IOC,6)
  #define MGSL_IOCGSTATS         _IO(MGSL_MAGIC_IOC,7)
-#define MGSL_IOCWAITEVENT      _IO(MGSL_MAGIC_IOC,8)
+#define MGSL_IOCWAITEVENT      _IOWR(MGSL_MAGIC_IOC,8,int)
  #define MGSL_IOCCLRMODCOUNT    _IO(MGSL_MAGIC_IOC,15)
+#define MGSL_IOCLOOPTXDONE     _IO(MGSL_MAGIC_IOC,9)
  
  #endif /* _SYNCLINK_H_ */
diff --git a/include/linux/umsdos_fs_i.h b/include/linux/umsdos_fs_i.h

index e3af5e921e9d10ca29fb2c2bcc5bfb243f52ab12..96135ec0fbe5afc315d7dfbb1f3b7634ca92bd1f 100644 (file)
--- a/include/linux/umsdos_fs_i.h
+++ b/include/linux/umsdos_fs_i.h
@@ -28,9 +28,8 @@
   * 
   * For directory, we also have a reference to the inode of its
   * own EMD file. Also, we have dir_locking_info to help synchronise
- * file creation and file lookup. This data is sharing space with
- * the pipe_inode_info not used by directory. See also msdos_fs_i.h
- * for more information about pipe_inode_info and msdos_inode_info.
+ * file creation and file lookup. See also msdos_fs_i.h for more 
+ * information about msdos_inode_info.
   * 
   * Special file and fifo do have an inode which correspond to an
   * empty MSDOS file.
@@ -38,11 +37,6 @@
   * symlink are processed mostly like regular file. The content is the
   * link.
   * 
- * fifos add there own extension to the inode. I have reserved some
- * space for fifos side by side with msdos_inode_info. This is just
- * to for the show, because msdos_inode_info already include the
- * pipe_inode_info.
- * 
   * The UMSDOS specific extension is placed after the union.
   */
  
@@ -60,7 +54,6 @@ struct dir_locking_info {
  struct umsdos_inode_info {
         union {
                 struct msdos_inode_info msdos_info;
-               struct pipe_inode_info pipe_info;
                 struct dir_locking_info dir_info;
         } u;
         int i_patched;                  /* Inode has been patched */
diff --git a/ipc/shm.c b/ipc/shm.c

index 9aa0e87b3c98f10273957f3886e7680795a981b8..8a1ded7c8098fffc61f37ca733ec140ce808ee6e 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -675,7 +675,7 @@ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long addr
  
  done:  /* pte_val(pte) == shp->shm_pages[idx] */
         current->min_flt++;
-       atomic_inc(&mem_map[MAP_NR(pte_page(pte))].count);
+       get_page(mem_map + MAP_NR(pte_page(pte)));
         return pte_page(pte);
  }
  
@@ -730,7 +730,7 @@ int shm_swap (int prio, int gfp_mask)
                 swap_free (swap_nr);
                 return 0;
         }
-       if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) != 1)
+       if (page_count(mem_map + MAP_NR(pte_page(page))) != 1)
                 goto check_table;
         shp->shm_pages[idx] = swap_nr;
         rw_swap_page_nocache (WRITE, swap_nr, (char *) pte_page(page));
@@ -751,7 +751,7 @@ static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
  
         pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
         shp->shm_pages[idx] = pte_val(pte);
-       atomic_inc(&mem_map[MAP_NR(page)].count);
+       get_page(mem_map + MAP_NR(page));
         shm_rss++;
  
         swap_free(entry);
diff --git a/kernel/ksyms.c b/kernel/ksyms.c

index 46e557c64dd08d1df8b0d3e66b302723dcd6dca0..c9742a18a98cb9a6df12c73144e45d99b85c4301 100644 (file)
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -106,7 +106,6 @@ EXPORT_SYMBOL(mem_map);
  EXPORT_SYMBOL(remap_page_range);
  EXPORT_SYMBOL(max_mapnr);
  EXPORT_SYMBOL(high_memory);
-EXPORT_SYMBOL(update_vm_cache);
  EXPORT_SYMBOL(vmtruncate);
  EXPORT_SYMBOL(find_vma);
  EXPORT_SYMBOL(get_unmapped_area);
@@ -175,7 +174,6 @@ EXPORT_SYMBOL(posix_test_lock);
  EXPORT_SYMBOL(posix_block_lock);
  EXPORT_SYMBOL(posix_unblock_lock);
  EXPORT_SYMBOL(dput);
-EXPORT_SYMBOL(get_cached_page);
  EXPORT_SYMBOL(put_cached_page);
  EXPORT_SYMBOL(is_root_busy);
  EXPORT_SYMBOL(prune_dcache);
@@ -361,7 +359,6 @@ EXPORT_SYMBOL(sys_tz);
  EXPORT_SYMBOL(__wait_on_super);
  EXPORT_SYMBOL(file_fsync);
  EXPORT_SYMBOL(clear_inode);
-EXPORT_SYMBOL(refile_buffer);
  EXPORT_SYMBOL(nr_async_pages);
  EXPORT_SYMBOL(___strtok);
  EXPORT_SYMBOL(init_special_inode);
diff --git a/mm/filemap.c b/mm/filemap.c

index 455f334f306d5b6597f3c5b3266d4e3d1087e58e..0edd3eeb57e4dae03e1e1138d7353de1578375d5 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1,7 +1,7 @@
  /*
   *     linux/mm/filemap.c
   *
- * Copyright (C) 1994, 1995  Linus Torvalds
+ * Copyright (C) 1994-1999  Linus Torvalds
   */
  
  /*
@@ -29,9 +29,12 @@
   * though.
   *
   * Shared mappings now work. 15.8.1995  Bruno.
+ *
+ * finished 'unifying' the page and buffer cache and SMP-threaded the
+ * page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
   */
  
-unsigned long page_cache_size = 0;
+atomic_t page_cache_size = ATOMIC_INIT(0);
  struct page * page_hash_table[PAGE_HASH_SIZE];
  
  /* 
@@ -50,26 +53,52 @@ static struct pio_request *pio_first = NULL, **pio_last = &pio_first;
  static kmem_cache_t *pio_request_cache;
  static DECLARE_WAIT_QUEUE_HEAD(pio_wait);
  
+spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
+
+
  static inline void 
  make_pio_request(struct file *, unsigned long, unsigned long);
  
+void __add_page_to_hash_queue(struct page * page, struct page **p){
+       atomic_inc(&page_cache_size);
+       if((page->next_hash = *p) != NULL)
+               (*p)->pprev_hash = &page->next_hash;
+       *p = page;
+       page->pprev_hash = p;
+       if (page->buffers)
+               PAGE_BUG(page);
+}
+
+static void remove_page_from_hash_queue(struct page * page)
+{
+       if(page->pprev_hash) {
+               if(page->next_hash)
+                       page->next_hash->pprev_hash = page->pprev_hash;
+               *page->pprev_hash = page->next_hash;
+               page->pprev_hash = NULL;
+       }
+       atomic_dec(&page_cache_size);
+}
+
  
-/*
- * Invalidate the pages of an inode, removing all pages that aren't
- * locked down (those are sure to be up-to-date anyway, so we shouldn't
- * invalidate them).
- */
  void invalidate_inode_pages(struct inode * inode)
  {
         struct page ** p;
         struct page * page;
  
+repeat:
+       spin_lock(&pagecache_lock);
         p = &inode->i_pages;
         while ((page = *p) != NULL) {
-               if (PageLocked(page)) {
-                       p = &page->next;
-                       continue;
+               get_page(page);
+               if (TryLockPage(page)) {
+                       spin_unlock(&pagecache_lock);
+                       wait_on_page(page);
+                       page_cache_release(page);
+                       goto repeat;
                 }
+               if (page_count(page) != 2)
+                       printk("hm, busy page invalidated? (not necesserily a bug)\n");
                 inode->i_nrpages--;
                 if ((*p = page->next) != NULL)
                         (*p)->prev = page->prev;
@@ -77,11 +106,13 @@ void invalidate_inode_pages(struct inode * inode)
                 page->prev = NULL;
                 remove_page_from_hash_queue(page);
                 page->inode = NULL;
+               UnlockPage(page);
+               page_cache_release(page);
                 page_cache_release(page);
-               continue;
+
         }
+       spin_unlock(&pagecache_lock);
  }
-
  /*
   * Truncate the page cache at a set offset, removing the pages
   * that are beyond that offset (and zeroing out partial pages).
@@ -90,18 +121,39 @@ void truncate_inode_pages(struct inode * inode, unsigned long start)
  {
         struct page ** p;
         struct page * page;
+       int partial = 0;
  
  repeat:
+       spin_lock(&pagecache_lock);
         p = &inode->i_pages;
         while ((page = *p) != NULL) {
                 unsigned long offset = page->offset;
  
                 /* page wholly truncated - free it */
                 if (offset >= start) {
-                       if (PageLocked(page)) {
+                       get_page(page);
+                       if (TryLockPage(page)) {
+                               spin_unlock(&pagecache_lock);
                                 wait_on_page(page);
+                               page_cache_release(page);
                                 goto repeat;
                         }
+                       if (page_count(page) != 2)
+                               printk("hm, busy page truncated? (not necesserily a bug)\n");
+                       spin_unlock(&pagecache_lock);
+
+                       if (inode->i_op->flushpage)
+                               inode->i_op->flushpage(inode, page, 0);
+
+                       /*
+                        * We remove the page from the page cache
+                        * _after_ we have destroyed all buffer-cache
+                        * references to it. Otherwise some other process
+                        * might think this inode page is not in the
+                        * page cache and creates a buffer-cache alias
+                        * to it causing all sorts of fun problems ...
+                        */
+                       spin_lock(&pagecache_lock);
                         inode->i_nrpages--;
                         if ((*p = page->next) != NULL)
                                 (*p)->prev = page->prev;
@@ -109,28 +161,87 @@ repeat:
                         page->prev = NULL;
                         remove_page_from_hash_queue(page);
                         page->inode = NULL;
+
+                       if (page_count(page) != 2)
+                               printk("hm, busy page truncated? (not necesserily a bug)\n");
+                       spin_unlock(&pagecache_lock);
+
+                       UnlockPage(page);
                         page_cache_release(page);
-                       continue;
+                       page_cache_release(page);
+
+                       /*
+                        * We have done things without the pagecache lock,
+                        * so we'll have to repeat the scan.
+                        * It's not possible to deadlock here because
+                        * we are guaranteed to make progress. (ie. we have
+                        * just removed a page)
+                        */
+                       goto repeat;
                 }
                 p = &page->next;
+               /*
+                * there is only one partial page possible.
+                */
+               if (partial)
+                       continue;
+
                 offset = start - offset;
                 /* partial truncate, clear end of page */
                 if (offset < PAGE_CACHE_SIZE) {
-                       unsigned long address = page_address(page);
+                       unsigned long address;
+                       /*
+                        * It's worth dropping the write lock only at
+                        * this point. We are holding the page lock
+                        * so nobody can do anything bad to us.
+                        */
+                       spin_unlock(&pagecache_lock);
+                       partial = 1;
+
+                       address = page_address(page);
                         memset((void *) (offset + address), 0, PAGE_CACHE_SIZE - offset);
                         flush_page_to_ram(address);
+                       /*
+                        * we have dropped the lock so we have to
+                        * restart.
+                        */
+                       goto repeat;
                 }
         }
+       spin_unlock(&pagecache_lock);
  }
  
  /*
- * Remove a page from the page cache and free it.
+ * Remove a page from the page cache and free it. Caller has to make
+ * sure the page is locked and that nobody else uses it - or that usage
+ * is safe.
   */
  void remove_inode_page(struct page *page)
  {
-       remove_page_from_hash_queue(page);
+       struct inode *inode = page->inode;
+
+       if (!PageLocked(page))
+               PAGE_BUG(page);
+
+       /*
+        * We might sleep here. Other processes might arrive and sleep on
+        * the lock, but nobody is allowed to 'cross' the lock and get a
+        * reference to the page. We then remove the page from the hash
+        * before unlocking it. This mechanizm ensures that 1) nobody gets
+        * a half-freed page 2) nobody creates the same pagecache content
+        * before we finish destroying this page. This is not a
+        * performance problem as pages here are candidates for getting
+        * freed, ie. it's supposed to be unlikely that the above situation
+        * happens.
+        */
+       if (inode->i_op->flushpage)
+               inode->i_op->flushpage(inode, page, 1);
+
+       spin_lock(&pagecache_lock);
         remove_page_from_inode_queue(page);
-       page_cache_release(page);
+       remove_page_from_hash_queue(page);
+       page->inode = NULL;
+       spin_unlock(&pagecache_lock);
  }
  
  int shrink_mmap(int priority, int gfp_mask)
@@ -138,7 +249,7 @@ int shrink_mmap(int priority, int gfp_mask)
         static unsigned long clock = 0;
         unsigned long limit = num_physpages;
         struct page * page;
-       int count;
+       int count, err;
  
         count = limit >> priority;
  
@@ -171,7 +282,7 @@ int shrink_mmap(int priority, int gfp_mask)
                         continue;
  
                 /* We can't free pages unless there's just one user */
-               if (atomic_read(&page->count) != 1)
+               if (page_count(page) != 1)
                         continue;
  
                 count--;
@@ -185,74 +296,110 @@ int shrink_mmap(int priority, int gfp_mask)
                         if (referenced && swap_count(page->offset) != 1)
                                 continue;
                         delete_from_swap_cache(page);
-                       return 1;
+                       err = 1;
+                       goto out;
                 }       
  
                 if (referenced)
                         continue;
  
+               /* is it a page-cache page? */
+               spin_lock(&pagecache_lock);
+               if (page->inode) {
+                       if (pgcache_under_min())
+                               goto unlock_continue;
+                       if (TryLockPage(page))
+                               goto unlock_continue;
+                       if (page_count(page) != 1) {
+                               UnlockPage(page);
+                               goto unlock_continue;
+                       }
+                       spin_unlock(&pagecache_lock);
+
+                       remove_inode_page(page);
+                       UnlockPage(page);
+                       page_cache_release(page);
+                       err = 1;
+                       goto out;
+unlock_continue:
+                       spin_unlock(&pagecache_lock);
+                       continue;
+               }
+               spin_unlock(&pagecache_lock);
+
                 /* Is it a buffer page? */
                 if (page->buffers) {
                         if (buffer_under_min())
                                 continue;
                         if (!try_to_free_buffers(page))
                                 continue;
-                       return 1;
-               }
-
-               /* is it a page-cache page? */
-               if (page->inode) {
-                       if (pgcache_under_min())
-                               continue;
-                       remove_inode_page(page);
-                       return 1;
+                       err = 1;
+                       goto out;
                 }
  
         } while (count > 0);
-       return 0;
+       err = 0;
+out:
+       return err;
  }
  
-/*
- * Update a page cache copy, when we're doing a "write()" system call
- * See also "update_vm_cache()".
- */
-void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
+static inline struct page * __find_page_nolock(struct inode * inode, unsigned long offset, struct page *page)
  {
-       unsigned long offset, len;
+       goto inside;
  
-       offset = (pos & ~PAGE_CACHE_MASK);
-       pos = pos & PAGE_CACHE_MASK;
-       len = PAGE_CACHE_SIZE - offset;
-       do {
-               struct page * page;
-
-               if (len > count)
-                       len = count;
-               page = find_page(inode, pos);
-               if (page) {
-                       wait_on_page(page);
-                       memcpy((void *) (offset + page_address(page)), buf, len);
-                       page_cache_release(page);
-               }
-               count -= len;
-               buf += len;
-               len = PAGE_CACHE_SIZE;
-               offset = 0;
-               pos += PAGE_CACHE_SIZE;
-       } while (count);
+       for (;;) {
+               page = page->next_hash;
+inside:
+               if (!page)
+                       goto not_found;
+               if (page->inode != inode)
+                       continue;
+               if (page->offset == offset)
+                       break;
+       }
+not_found:
+       return page;
  }
  
-static inline void add_to_page_cache(struct page * page,
+/*
+ * This adds a page to the page cache, starting out as locked,
+ * owned by us, referenced, but not uptodate and with no errors.
+ */
+static inline void __add_to_page_cache(struct page * page,
         struct inode * inode, unsigned long offset,
         struct page **hash)
  {
-       atomic_inc(&page->count);
-       page->flags = (page->flags & ~((1 << PG_uptodate) | (1 << PG_error))) | (1 << PG_referenced);
+       unsigned long flags;
+
+       flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error));
+       page->flags = flags |  ((1 << PG_locked) | (1 << PG_referenced));
+       page->owner = (int)current;     /* REMOVEME */
+       get_page(page);
         page->offset = offset;
         add_page_to_inode_queue(inode, page);
         __add_page_to_hash_queue(page, hash);
  }
  
+int add_to_page_cache_unique(struct page * page,
+       struct inode * inode, unsigned long offset,
+       struct page **hash)
+{
+       int err;
+       struct page *alias;
+
+       spin_lock(&pagecache_lock);
+       alias = __find_page_nolock(inode, offset, *hash);
+
+       err = 1;
+       if (!alias) {
+               __add_to_page_cache(page,inode,offset,hash);
+               err = 0;
+       }
+
+       spin_unlock(&pagecache_lock);
+       return err;
+}
+
  /*
   * Try to read ahead in the file. "page_cache" is a potentially free page
   * that we could use for the cache (if it is 0 we can try to create one,
@@ -275,29 +422,30 @@ static unsigned long try_to_read_ahead(struct file * file,
                 if (offset >= inode->i_size)
                         break;
                 hash = page_hash(inode, offset);
-               page = __find_page(inode, offset, *hash);
-               if (!page) {
+               page = page_cache_entry(page_cache);
+               if (!add_to_page_cache_unique(page, inode, offset, hash)) {
                         /*
-                        * Ok, add the new page to the hash-queues...
+                        * We do not have to check the return value here
+                        * because it's a readahead.
                          */
-                       page = page_cache_entry(page_cache);
-                       add_to_page_cache(page, inode, offset, hash);
+                       lock_kernel();
                         inode->i_op->readpage(file, page);
+                       unlock_kernel();
                         page_cache = 0;
+                       page_cache_release(page);
                 }
-               page_cache_release(page);
         }
         return page_cache;
  }
  
  /* 
- * Wait for IO to complete on a locked page.
+ * Wait for a page to get unlocked.
   *
   * This must be called with the caller "holding" the page,
   * ie with increased "page->count" so that the page won't
   * go away during the wait..
   */
-void __wait_on_page(struct page *page)
+void ___wait_on_page(struct page *page)
  {
         struct task_struct *tsk = current;
         DECLARE_WAITQUEUE(wait, tsk);
@@ -307,13 +455,143 @@ repeat:
         tsk->state = TASK_UNINTERRUPTIBLE;
         run_task_queue(&tq_disk);
         if (PageLocked(page)) {
-               schedule();
+               int left;
+               left = schedule_timeout(HZ*20);
+               if (!left)
+                       PAGE_BUG(page);
                 goto repeat;
         }
         tsk->state = TASK_RUNNING;
         remove_wait_queue(&page->wait, &wait);
  }
  
+/*
+ * Get an exclusive lock on the page..
+ */
+static void lock_page(struct page *page)
+{
+       if (TryLockPage(page)) {
+               struct task_struct *tsk = current;
+               DECLARE_WAITQUEUE(wait, current);
+
+               run_task_queue(&tq_disk);
+               add_wait_queue(&page->wait, &wait);
+               tsk->state = TASK_UNINTERRUPTIBLE;
+
+               while (TryLockPage(page)) {
+                       schedule();
+                       tsk->state = TASK_UNINTERRUPTIBLE;
+               }
+
+               remove_wait_queue(&page->wait, &wait);
+               tsk->state = TASK_RUNNING;
+       }
+}
+
+
+/*
+ * a rather lightweight function, finding and getting a reference to a
+ * hashed page atomically, waiting for it if it's locked.
+ */
+struct page * __find_get_page (struct inode * inode,
+                               unsigned long offset, struct page *page)
+{
+
+       /*
+        * We scan the hash list read-only. Addition to and removal from
+        * the hash-list needs a held write-lock.
+        */
+repeat:
+       spin_lock(&pagecache_lock);
+       page = __find_page_nolock(inode, offset, page);
+       if (page)
+               get_page(page);
+       spin_unlock(&pagecache_lock);
+
+       /* Found the page, sleep if locked. */
+       if (page && PageLocked(page)) {
+               struct task_struct *tsk = current;
+               DECLARE_WAITQUEUE(wait, tsk);
+
+               add_wait_queue(&page->wait, &wait);
+               tsk->state = TASK_UNINTERRUPTIBLE;
+
+               run_task_queue(&tq_disk);
+               if (PageLocked(page))
+                       schedule();
+               tsk->state = TASK_RUNNING;
+               remove_wait_queue(&page->wait, &wait);
+
+               /*
+                * The page might have been unhashed meanwhile. It's
+                * not freed though because we hold a reference to it.
+                * If this is the case then it will be freed _here_,
+                * and we recheck the hash anyway.
+                */
+               page_cache_release(page);
+               goto repeat;
+       }
+       /*
+        * It's not locked so we can return the page and we hold
+        * a reference to it.
+        */
+       return page;
+}
+
+/*
+ * Get the lock to a page atomically.
+ */
+struct page * __find_lock_page (struct inode * inode,
+                               unsigned long offset, struct page *page)
+{
+       int locked;
+
+
+       /*
+        * We scan the hash list read-only. Addition to and removal from
+        * the hash-list needs a held write-lock.
+        */
+repeat:
+       spin_lock(&pagecache_lock);
+       page = __find_page_nolock(inode, offset, page);
+       locked = 0;
+       if (page) {
+               get_page(page);
+               if (TryLockPage(page))
+                       locked = 1;
+       }
+       spin_unlock(&pagecache_lock);
+
+       /* Found the page, sleep if locked. */
+       if (page && locked) {
+               struct task_struct *tsk = current;
+               DECLARE_WAITQUEUE(wait, tsk);
+
+               add_wait_queue(&page->wait, &wait);
+               tsk->state = TASK_UNINTERRUPTIBLE;
+
+               run_task_queue(&tq_disk);
+               if (PageLocked(page))
+                       schedule();
+               tsk->state = TASK_RUNNING;
+               remove_wait_queue(&page->wait, &wait);
+
+               /*
+                * The page might have been unhashed meanwhile. It's
+                * not freed though because we hold a reference to it.
+                * If this is the case then it will be freed _here_,
+                * and we recheck the hash anyway.
+                */
+               page_cache_release(page);
+               goto repeat;
+       }
+       /*
+        * It's not locked so we can return the page and we hold
+        * a reference to it.
+        */
+       return page;
+}
+
  #if 0
  #define PROFILE_READAHEAD
  #define DEBUG_READAHEAD
@@ -386,14 +664,14 @@ static void profile_readahead(int async, struct file *filp)
   * -------------------
   * The read ahead context fields of the "struct file" are the following:
   * - f_raend : position of the first byte after the last page we tried to
- *             read ahead.
+ *            read ahead.
   * - f_ramax : current read-ahead maximum size.
   * - f_ralen : length of the current IO read block we tried to read-ahead.
   * - f_rawin : length of the current read-ahead window.
- *             if last read-ahead was synchronous then
- *                  f_rawin = f_ralen
- *             otherwise (was asynchronous)
- *                  f_rawin = previous value of f_ralen + f_ralen
+ *             if last read-ahead was synchronous then
+ *                     f_rawin = f_ralen
+ *             otherwise (was asynchronous)
+ *                     f_rawin = previous value of f_ralen + f_ralen
   *
   * Read-ahead limits:
   * ------------------
@@ -485,7 +763,7 @@ static inline unsigned long generic_file_readahead(int reada_ok,
   * We will later force unplug device in order to force asynchronous read IO.
   */
         else if (reada_ok && filp->f_ramax && raend >= PAGE_CACHE_SIZE &&
-                ppos <= raend && ppos + filp->f_ralen >= raend) {
+                ppos <= raend && ppos + filp->f_ralen >= raend) {
  /*
   * Add ONE page to max_ahead in order to try to have about the same IO max size
   * as synchronous read-ahead (MAX_READAHEAD + 1)*PAGE_CACHE_SIZE.
@@ -578,6 +856,7 @@ static void do_generic_file_read(struct file * filp, loff_t *ppos, read_descript
         struct inode *inode = dentry->d_inode;
         size_t pos, pgpos, page_cache;
         int reada_ok;
+       int error;
         int max_readahead = get_max_readahead(inode);
  
         page_cache = 0;
@@ -633,33 +912,22 @@ static void do_generic_file_read(struct file * filp, loff_t *ppos, read_descript
                  * Try to find the data in the page cache..
                  */
                 hash = page_hash(inode, pos & PAGE_CACHE_MASK);
-               page = __find_page(inode, pos & PAGE_CACHE_MASK, *hash);
+
+               spin_lock(&pagecache_lock);
+               page = __find_page_nolock(inode, pos & PAGE_CACHE_MASK, *hash);
                 if (!page)
                         goto no_cached_page;
-
  found_page:
-/*
- * Try to read ahead only if the current page is filled or being filled.
- * Otherwise, if we were reading ahead, decrease max read ahead size to
- * the minimum value.
- * In this context, that seems to may happen only on some read error or if 
- * the page has been rewritten.
- */
-               if (PageUptodate(page) || PageLocked(page))
-                       page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_CACHE_MASK, page, page_cache);
-               else if (reada_ok && filp->f_ramax > MIN_READAHEAD)
-                               filp->f_ramax = MIN_READAHEAD;
-
-               wait_on_page(page);
-
-               if (!PageUptodate(page))
-                       goto page_read_error;
+               get_page(page);
+               spin_unlock(&pagecache_lock);
  
-success:
-               /*
-                * Ok, we have the page, it's up-to-date and ok,
-                * so now we can finally copy it to user space...
-                */
+               if (!Page_Uptodate(page))
+                       goto page_not_up_to_date;
+page_ok:
+       /*
+        * Ok, we have the page, and it's up-to-date, so
+        * now we can copy it to user space...
+        */
         {
                 unsigned long offset, nr;
  
@@ -683,75 +951,79 @@ success:
                 break;
         }
  
+/*
+ * Ok, the page was not immediately readable, so let's try to read ahead while we're at it..
+ */
+page_not_up_to_date:
+               page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_CACHE_MASK, page, page_cache);
+
+               if (Page_Uptodate(page))
+                       goto page_ok;
+
+               /* Get exclusive access to the page ... */
+               lock_page(page);
+               if (Page_Uptodate(page)) {
+                       UnlockPage(page);
+                       goto page_ok;
+               }
+
+read_page:
+               /* ... and start the actual read. The read will unlock the page. */
+               lock_kernel();
+               error = inode->i_op->readpage(filp, page);
+               unlock_kernel();
+
+               if (!error) {
+                       if (Page_Uptodate(page))
+                               goto page_ok;
+
+                       /* Again, try some read-ahead while waiting for the page to finish.. */
+                       page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_CACHE_MASK, page, page_cache);
+                       wait_on_page(page);
+                       if (Page_Uptodate(page))
+                               goto page_ok;
+                       error = -EIO;
+               }
+
+               /* UHHUH! A synchronous read error occurred. Report it */
+               desc->error = error;
+               page_cache_release(page);
+               break;
+
  no_cached_page:
                 /*
                  * Ok, it wasn't cached, so we need to create a new
                  * page..
+                *
+                * We get here with the page cache lock held.
                  */
                 if (!page_cache) {
+                       spin_unlock(&pagecache_lock);
                         page_cache = page_cache_alloc();
+                       if (!page_cache) {
+                               desc->error = -ENOMEM;
+                               break;
+                       }
+
                         /*
-                        * That could have slept, so go around to the
-                        * very beginning..
+                        * Somebody may have added the page while we
+                        * dropped the page cache lock. Check for that.
                          */
-                       if (page_cache)
-                               continue;
-                       desc->error = -ENOMEM;
-                       break;
+                       spin_lock(&pagecache_lock);
+                       page = __find_page_nolock(inode, pos & PAGE_CACHE_MASK, *hash);
+                       if (page)
+                               goto found_page;
                 }
  
                 /*
                  * Ok, add the new page to the hash-queues...
                  */
                 page = page_cache_entry(page_cache);
-               page_cache = 0;
-               add_to_page_cache(page, inode, pos & PAGE_CACHE_MASK, hash);
-
-               /*
-                * Error handling is tricky. If we get a read error,
-                * the cached page stays in the cache (but uptodate=0),
-                * and the next process that accesses it will try to
-                * re-read it. This is needed for NFS etc, where the
-                * identity of the reader can decide if we can read the
-                * page or not..
-                */
-/*
- * We have to read the page.
- * If we were reading ahead, we had previously tried to read this page,
- * That means that the page has probably been removed from the cache before 
- * the application process needs it, or has been rewritten.
- * Decrease max readahead size to the minimum value in that situation.
- */
-               if (reada_ok && filp->f_ramax > MIN_READAHEAD)
-                       filp->f_ramax = MIN_READAHEAD;
-
-               {
-                       int error = inode->i_op->readpage(filp, page);
-                       if (!error)
-                               goto found_page;
-                       desc->error = error;
-                       page_cache_release(page);
-                       break;
-               }
+               __add_to_page_cache(page, inode, pos & PAGE_CACHE_MASK, hash);
+               spin_unlock(&pagecache_lock);
  
-page_read_error:
-               /*
-                * We found the page, but it wasn't up-to-date.
-                * Try to re-read it _once_. We do this synchronously,
-                * because this happens only if there were errors.
-                */
-               {
-                       int error = inode->i_op->readpage(filp, page);
-                       if (!error) {
-                               wait_on_page(page);
-                               if (PageUptodate(page) && !PageError(page))
-                                       goto success;
-                               error = -EIO; /* Some unspecified error occurred.. */
-                       }
-                       desc->error = error;
-                       page_cache_release(page);
-                       break;
-               }
+               page_cache = 0;
+               goto read_page;
         }
  
         *ppos = pos;
@@ -787,6 +1059,7 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
  {
         ssize_t retval;
  
+       unlock_kernel();
         retval = -EFAULT;
         if (access_ok(VERIFY_WRITE, buf, count)) {
                 retval = 0;
@@ -804,6 +1077,7 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
                                 retval = desc.error;
                 }
         }
+       lock_kernel();
         return retval;
  }
  
@@ -934,17 +1208,20 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
         unsigned long offset, reada, i;
         struct page * page, **hash;
         unsigned long old_page, new_page;
+       int error;
  
         new_page = 0;
         offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
         if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
-               goto no_page;
+               goto no_page_nolock;
+       unlock_kernel();
  
         /*
          * Do we have something in the page cache already?
          */
         hash = page_hash(inode, offset);
-       page = __find_page(inode, offset, *hash);
+retry_find:
+       page = __find_get_page(inode, offset, *hash);
         if (!page)
                 goto no_cached_page;
  
@@ -960,15 +1237,15 @@ found_page:
                         goto failure;
         }
  
-       if (PageLocked(page))
-               goto page_locked_wait;
-       if (!PageUptodate(page))
-               goto page_read_error;
+       wait_on_page(page);
+
+       if (!Page_Uptodate(page))
+               PAGE_BUG(page);
  
  success:
         /*
-        * Found the page, need to check sharing and possibly
-        * copy it over to another page..
+        * Found the page and have a reference on it, need to check sharing
+        * and possibly copy it over to another page..
          */
         old_page = page_address(page);
         if (!no_share) {
@@ -980,6 +1257,7 @@ success:
                         page_cache_free(new_page);
  
                 flush_page_to_ram(old_page);
+               lock_kernel();
                 return old_page;
         }
  
@@ -989,6 +1267,7 @@ success:
         copy_page(new_page, old_page);
         flush_page_to_ram(new_page);
         page_cache_release(page);
+       lock_kernel();
         return new_page;
  
  no_cached_page:
@@ -1013,7 +1292,7 @@ no_cached_page:
          * cache.. The page we just got may be useful if we
          * can't share, so don't get rid of it here.
          */
-       page = find_page(inode, offset);
+       page = __find_get_page(inode, offset, *hash);
         if (page)
                 goto found_page;
  
@@ -1021,19 +1300,25 @@ no_cached_page:
          * Now, create a new page-cache page from the page we got
          */
         page = page_cache_entry(new_page);
-       new_page = 0;
-       add_to_page_cache(page, inode, offset, hash);
+       if (add_to_page_cache_unique(page, inode, offset, hash))
+               goto retry_find;
  
-       if (inode->i_op->readpage(file, page) != 0)
-               goto failure;
+       /*
+        * Now it's ours and locked, we can do initial IO to it:
+        */
+       new_page = 0;
  
-       goto found_page;
+       lock_kernel();
+       error = inode->i_op->readpage(file, page);
+       unlock_kernel();
  
-page_locked_wait:
-       __wait_on_page(page);
-       if (PageUptodate(page))
+       if (!error) {
+               wait_on_page(page);
+               if (PageError(page))
+                       goto page_read_error;
                 goto success;
-       
+       }
+
  page_read_error:
         /*
          * Umm, take care of errors if the page isn't up-to-date.
@@ -1041,12 +1326,16 @@ page_read_error:
          * because there really aren't any performance issues here
          * and we need to check for errors.
          */
-       if (inode->i_op->readpage(file, page) != 0)
+       if (!PageLocked(page))
+               PAGE_BUG(page);
+       ClearPageError(page);
+       lock_kernel();
+       error = inode->i_op->readpage(file, page);
+       unlock_kernel();
+       if (error)
                 goto failure;
         wait_on_page(page);
-       if (PageError(page))
-               goto failure;
-       if (PageUptodate(page))
+       if (Page_Uptodate(page))
                 goto success;
  
         /*
@@ -1058,6 +1347,8 @@ failure:
         if (new_page)
                 page_cache_free(new_page);
  no_page:
+       lock_kernel();
+no_page_nolock:
         return 0;
  }
  
@@ -1066,12 +1357,14 @@ no_page:
   * if the disk is full.
   */
  static inline int do_write_page(struct inode * inode, struct file * file,
-       const char * page, unsigned long offset)
+       const char * page_addr, unsigned long offset)
  {
         int retval;
         unsigned long size;
         loff_t loff = offset;
         mm_segment_t old_fs;
+       int (*writepage) (struct file *, struct page *);
+       struct page * page;
  
         size = offset + PAGE_SIZE;
         /* refuse to extend file size.. */
@@ -1086,8 +1379,19 @@ static inline int do_write_page(struct inode * inode, struct file * file,
         old_fs = get_fs();
         set_fs(KERNEL_DS);
         retval = -EIO;
-       if (size == file->f_op->write(file, (const char *) page, size, &loff))
+       writepage = inode->i_op->writepage;
+       page = mem_map + MAP_NR(page_addr);
+repeat:
+       wait_on_page(page);
+       if (TryLockPage(page))
+               goto repeat;
+       if (writepage) {
+               retval = writepage(file, page);
+       } else {
+               if (size == file->f_op->write(file, page_addr, size, &loff))
                 retval = 0;
+       }
+       UnlockPage(page);
         set_fs(old_fs);
         return retval;
  }
@@ -1146,7 +1450,8 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
         unsigned long address, unsigned int flags)
  {
         pte_t pte = *ptep;
-       unsigned long page;
+       unsigned long pageaddr;
+       struct page *page;
         int error;
  
         if (!(flags & MS_INVALIDATE)) {
@@ -1158,8 +1463,9 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
                 flush_cache_page(vma, address);
                 set_pte(ptep, pte_mkclean(pte));
                 flush_tlb_page(vma, address);
-               page = pte_page(pte);
-               atomic_inc(&page_cache_entry(page)->count);
+               pageaddr = pte_page(pte);
+               page = page_cache_entry(pageaddr);
+               get_page(page);
         } else {
                 if (pte_none(pte))
                         return 0;
@@ -1170,14 +1476,14 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
                         swap_free(pte_val(pte));
                         return 0;
                 }
-               page = pte_page(pte);
+               pageaddr = pte_page(pte);
                 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
-                       page_cache_free(page);
+                       page_cache_free(pageaddr);
                         return 0;
                 }
         }
-       error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page, 1);
-       page_cache_free(page);
+       error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, pageaddr, 1);
+       page_cache_free(pageaddr);
         return error;
  }
  
@@ -1436,11 +1742,12 @@ generic_file_write(struct file *file, const char *buf,
         unsigned long   page_cache = 0;
         unsigned long   written;
         long            status;
+       int             err;
  
-       if (file->f_error) {
-               int error = file->f_error;
+       err = file->f_error;
+       if (err) {
                 file->f_error = 0;
-               return error;
+               goto out;
         }
  
         written = 0;
@@ -1451,7 +1758,7 @@ generic_file_write(struct file *file, const char *buf,
         /*
          * Check whether we've reached the file size limit.
          */
-       status = -EFBIG;
+       err = -EFBIG;
         if (pos >= limit) {
                 send_sig(SIGXFSZ, current, 0);
                 goto out;
@@ -1467,6 +1774,8 @@ generic_file_write(struct file *file, const char *buf,
                 count = limit - pos;
         }
  
+       unlock_kernel();
+
         while (count) {
                 unsigned long bytes, pgpos, offset;
                 /*
@@ -1480,29 +1789,36 @@ generic_file_write(struct file *file, const char *buf,
                         bytes = count;
  
                 hash = page_hash(inode, pgpos);
-               page = __find_page(inode, pgpos, *hash);
+repeat_find:
+               page = __find_lock_page(inode, pgpos, *hash);
                 if (!page) {
                         if (!page_cache) {
                                 page_cache = page_cache_alloc();
                                 if (page_cache)
-                                       continue;
+                                       goto repeat_find;
                                 status = -ENOMEM;
                                 break;
                         }
                         page = page_cache_entry(page_cache);
-                       add_to_page_cache(page, inode, pgpos, hash);
+                       if (add_to_page_cache_unique(page,inode,pgpos,hash))
+                               goto repeat_find;
+
                         page_cache = 0;
                 }
  
-               /* Get exclusive IO access to the page.. */
-               wait_on_page(page);
-               set_bit(PG_locked, &page->flags);
+               /* We have exclusive IO access to the page.. */
+               if (!PageLocked(page)) {
+                       PAGE_BUG(page);
+               } else {
+                       if (page->owner != (int)current) {
+                               PAGE_BUG(page);
+                       }
+               }
  
                 status = write_one_page(file, page, offset, bytes, buf);
  
                 /* Mark it unlocked again and drop the page.. */
-               clear_bit(PG_locked, &page->flags);
-               wake_up(&page->wait);
+               UnlockPage(page);
                 page_cache_release(page);
  
                 if (status < 0)
@@ -1519,51 +1835,16 @@ generic_file_write(struct file *file, const char *buf,
  
         if (page_cache)
                 page_cache_free(page_cache);
+
+       err = written ? written : status;
+       lock_kernel();
  out:
-       return written ? written : status;
+       return err;
  }
  
  /*
- * Support routines for directory cacheing using the page cache.
- */
-
-/*
- * Finds the page at the specified offset, installing a new page
- * if requested.  The count is incremented and the page is locked.
- *
- * Note: we don't have to worry about races here, as the caller
- * is holding the inode semaphore.
+ * Support routines for directory caching using the page cache.
   */
-unsigned long get_cached_page(struct inode * inode, unsigned long offset,
-                               int new)
-{
-       struct page * page;
-       struct page ** hash;
-       unsigned long page_cache = 0;
-
-       hash = page_hash(inode, offset);
-       page = __find_page(inode, offset, *hash);
-       if (!page) {
-               if (!new)
-                       goto out;
-               page_cache = page_cache_alloc();
-               if (!page_cache)
-                       goto out;
-               clear_page(page_cache);
-               page = page_cache_entry(page_cache);
-               add_to_page_cache(page, inode, offset, hash);
-       }
-       if (atomic_read(&page->count) != 2)
-               printk(KERN_ERR "get_cached_page: page count=%d\n",
-                       atomic_read(&page->count));
-       if (test_bit(PG_locked, &page->flags))
-               printk(KERN_ERR "get_cached_page: page already locked!\n");
-       set_bit(PG_locked, &page->flags);
-       page_cache = page_address(page);
-
-out:
-       return page_cache;
-}
  
  /*
   * Unlock and free a page.
@@ -1572,13 +1853,10 @@ void put_cached_page(unsigned long addr)
  {
         struct page * page = page_cache_entry(addr);
  
-       if (!test_bit(PG_locked, &page->flags))
-               printk("put_cached_page: page not locked!\n");
-       if (atomic_read(&page->count) != 2)
-               printk("put_cached_page: page count=%d\n", 
-                       atomic_read(&page->count));
-       clear_bit(PG_locked, &page->flags);
-       wake_up(&page->wait);
+       UnlockPage(page);
+       if (page_count(page) != 2)
+               panic("put_cached_page: page count=%d\n", 
+                       page_count(page));
         page_cache_release(page);
  }
  
@@ -1607,11 +1885,13 @@ static inline struct pio_request * get_pio_request(void)
  
  static inline void make_pio_request(struct file *file,
                                     unsigned long offset,
-                                   unsigned long page)
+                                   unsigned long pageaddr)
  {
         struct pio_request *p;
+       struct page *page;
  
-       atomic_inc(&page_cache_entry(page)->count);
+       page = page_cache_entry(pageaddr);
+       get_page(page);
  
         /* 
          * We need to allocate without causing any recursive IO in the
@@ -1634,7 +1914,7 @@ static inline void make_pio_request(struct file *file,
         
         p->file   = file;
         p->offset = offset;
-       p->page   = page;
+       p->page   = pageaddr;
  
         put_pio_request(p);
         wake_up(&pio_wait);
diff --git a/mm/memory.c b/mm/memory.c

index 380233620e16e253871f605ae052dbcf555512ae..430ad8d238a5fb287165ba863c0ff20f871dfc9a 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -272,7 +272,7 @@ skip_copy_pte_range:                address = (address + PMD_SIZE) & PMD_MASK;
                                 if (vma->vm_flags & VM_SHARED)
                                         pte = pte_mkclean(pte);
                                 set_pte(dst_pte, pte_mkold(pte));
-                               atomic_inc(&mem_map[page_nr].count);
+                               get_page(mem_map + page_nr);
                         
  cont_copy_pte_range:           address += PAGE_SIZE;
                                 if (address >= end)
@@ -554,7 +554,7 @@ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsig
  
         if (MAP_NR(page) >= max_mapnr)
                 printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
-       if (atomic_read(&mem_map[MAP_NR(page)].count) != 1)
+       if (page_count(mem_map + MAP_NR(page)) != 1)
                 printk("mem_map disagrees with %08lx at %08lx\n",page,address);
         pgd = pgd_offset(tsk->mm,address);
         pmd = pmd_alloc(pgd, address);
@@ -602,17 +602,17 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
         unsigned long address, pte_t *page_table, pte_t pte)
  {
         unsigned long old_page, new_page;
-       struct page * page_map;
+       struct page * page;
         
         new_page = __get_free_page(GFP_USER);
-       /* Did swap_out() unmapped the protected page while we slept? */
+       /* Did swap_out() unmap the protected page while we slept? */
         if (pte_val(*page_table) != pte_val(pte))
                 goto end_wp_page;
         old_page = pte_page(pte);
         if (MAP_NR(old_page) >= max_mapnr)
                 goto bad_wp_page;
         tsk->min_flt++;
-       page_map = mem_map + MAP_NR(old_page);
+       page = mem_map + MAP_NR(old_page);
         
         /*
          * We can avoid the copy if:
@@ -622,13 +622,13 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
          *   in which case we can remove the page
          *   from the swap cache.
          */
-       switch (atomic_read(&page_map->count)) {
+       switch (page_count(page)) {
         case 2:
-               if (!PageSwapCache(page_map))
+               if (!PageSwapCache(page))
                         break;
-               if (swap_count(page_map->offset) != 1)
+               if (swap_count(page->offset) != 1)
                         break;
-               delete_from_swap_cache(page_map);
+               delete_from_swap_cache(page);
                 /* FallThrough */
         case 1:
                 flush_cache_page(vma, address);
@@ -650,7 +650,7 @@ end_wp_page:
         if (!new_page)
                 goto no_new_page;
  
-       if (PageReserved(page_map))
+       if (PageReserved(page))
                 ++vma->vm_mm->rss;
         copy_cow_page(old_page,new_page);
         flush_page_to_ram(old_page);
@@ -659,7 +659,7 @@ end_wp_page:
         set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
         flush_tlb_page(vma, address);
         unlock_kernel();
-       __free_page(page_map);
+       __free_page(page);
         return 1;
  
  bad_wp_page:
@@ -774,7 +774,7 @@ static int do_swap_page(struct task_struct * tsk,
                 if (pte_val(*page_table) != pte_val(entry)) {
                         free_page(pte_page(page));
                 } else {
-                       if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) > 1 &&
+                       if (page_count(mem_map + MAP_NR(pte_page(page))) > 1 &&
                             !(vma->vm_flags & VM_SHARED))
                                 page = pte_wrprotect(page);
                         ++vma->vm_mm->rss;
@@ -858,7 +858,7 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
         entry = mk_pte(page, vma->vm_page_prot);
         if (write_access) {
                 entry = pte_mkwrite(pte_mkdirty(entry));
-       } else if (atomic_read(&mem_map[MAP_NR(page)].count) > 1 &&
+       } else if (page_count(mem_map+MAP_NR(page)) > 1 &&
                    !(vma->vm_flags & VM_SHARED))
                 entry = pte_wrprotect(entry);
         set_pte(page_table, entry);
diff --git a/mm/mmap.c b/mm/mmap.c

index 6e5eda00d171a985f9c763bea9015384595708ea..e179a29320df1bdde3eab8eb57ee7bbb947bcb2f 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -63,7 +63,7 @@ int vm_enough_memory(long pages)
             return 1;
  
         free = buffermem >> PAGE_SHIFT;
-       free += page_cache_size;
+       free += atomic_read(&page_cache_size);
         free += nr_free_pages;
         free += nr_swap_pages;
         free -= (page_cache.min_percent + buffer_mem.min_percent + 2)*num_physpages/100; 
@@ -728,6 +728,10 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
         struct vm_area_struct * vma;
         unsigned long flags, retval;
  
+       len = PAGE_ALIGN(len);
+       if (!len)
+               return addr;
+
         /*
          * mlock MCL_FUTURE?
          */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 8826b9af1a864510e1eea3b67533cfd74110b140..0ce7357529d17e9526f5a9fff6ca3907333bb2a5 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -121,9 +121,9 @@ static inline void free_pages_ok(unsigned long map_nr, unsigned long order)
  
  void __free_page(struct page *page)
  {
-       if (!PageReserved(page) && atomic_dec_and_test(&page->count)) {
+       if (!PageReserved(page) && put_page_testzero(page)) {
                 if (PageSwapCache(page))
-                       panic ("Freeing swap cache page");
+                       PAGE_BUG(page);
                 page->flags &= ~(1 << PG_referenced);
                 free_pages_ok(page - mem_map, 0);
                 return;
@@ -138,9 +138,9 @@ void free_pages(unsigned long addr, unsigned long order)
                 mem_map_t * map = mem_map + map_nr;
                 if (PageReserved(map))
                         return;
-               if (atomic_dec_and_test(&map->count)) {
+               if (put_page_testzero(map)) {
                         if (PageSwapCache(map))
-                               panic ("Freeing swap cache pages");
+                               PAGE_BUG(map);
                         map->flags &= ~(1 << PG_referenced);
                         free_pages_ok(map_nr, order);
                         return;
@@ -167,7 +167,7 @@ do { struct free_area_struct * area = free_area+order; \
                                 MARK_USED(map_nr, new_order, area); \
                                 nr_free_pages -= 1 << order; \
                                 EXPAND(ret, map_nr, order, new_order, area); \
-                               spin_unlock_irqrestore(&page_alloc_lock, flags); \
+                               spin_unlock_irqrestore(&page_alloc_lock,flags);\
                                 return ADDRESS(map_nr); \
                         } \
                         prev = ret; \
@@ -186,7 +186,7 @@ do { unsigned long size = 1 << high; \
                 index += size; \
                 map += size; \
         } \
-       atomic_set(&map->count, 1); \
+       set_page_count(map, 1); \
  } while (0)
  
  int low_on_memory = 0;
@@ -321,7 +321,7 @@ unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_m
         memset(mem_map, 0, start_mem - (unsigned long) mem_map);
         do {
                 --p;
-               atomic_set(&p->count, 0);
+               set_page_count(p, 0);
                 p->flags = (1 << PG_DMA) | (1 << PG_reserved);
                 init_waitqueue_head(&p->wait);
         } while (p > mem_map);
diff --git a/mm/page_io.c b/mm/page_io.c

index 9f5e82446e182ef0e6f37e21e3088dfe504efb7c..c94030ded1132df99682882bbf55853ec1906be6 100644 (file)
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -47,7 +47,7 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
  #ifdef DEBUG_SWAP
         printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n",
                 (rw == READ) ? "read" : "write", 
-               entry, (char *) page_address(page), atomic_read(&page->count),
+               entry, (char *) page_address(page), page_count(page),
                 wait ? "wait" : "nowait");
  #endif
  
@@ -105,12 +105,12 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
                 }
         }
         if (rw == READ) {
-               clear_bit(PG_uptodate, &page->flags);
+               ClearPageUptodate(page);
                 kstat.pswpin++;
         } else
                 kstat.pswpout++;
  
-       atomic_inc(&page->count);
+       get_page(page);
         if (p->swap_device) {
                 zones[0] = offset;
                 zones_used = 1;
@@ -167,7 +167,7 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
                                 printk("swap_after_unlock_page: lock already cleared\n");
                         wake_up(&lock_queue);
                 }
-               atomic_dec(&page->count);
+               put_page(page);
                 return;
         }
         if (!wait) {
@@ -182,23 +182,27 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
  
         /* block_size == PAGE_SIZE/zones_used */
         brw_page(rw, page, dev, zones, block_size, 0);
+
+       if (rw == WRITE) // HACK, FIXME
+               UnlockPage(page);
   
         /* Note! For consistency we do all of the logic,
          * decrementing the page count, and unlocking the page in the
          * swap lock map - in the IO completion handler.
          */
-       if (!wait) 
+       if (!wait) {
                 return;
+       }
         wait_on_page(page);
         /* This shouldn't happen, but check to be sure. */
-       if (atomic_read(&page->count) == 0)
+       if (page_count(page) == 0)
                 printk(KERN_ERR "rw_swap_page: page unused while waiting!\n");
  
  #ifdef DEBUG_SWAP
         printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n",
                 (rw == READ) ? "read" : "write", 
-               (char *) page_adddress(page), 
-               atomic_read(&page->count));
+               (char *) page_address(page), 
+               page_count(page));
  #endif
  }
  
@@ -238,7 +242,7 @@ void rw_swap_page(int rw, unsigned long entry, char *buf, int wait)
         struct page *page = mem_map + MAP_NR(buf);
  
         if (page->inode && page->inode != &swapper_inode)
-               panic ("Tried to swap a non-swapper page");
+               PAGE_BUG(page);
  
         /*
          * Make sure that we have a swap cache association for this
@@ -268,23 +272,27 @@ void rw_swap_page_nocache(int rw, unsigned long entry, char *buffer)
         struct page *page;
         
         page = mem_map + MAP_NR((unsigned long) buffer);
-       wait_on_page(page);
-       set_bit(PG_locked, &page->flags);
-       if (test_and_set_bit(PG_swap_cache, &page->flags)) {
-               printk ("VM: read_swap_page: page already in swap cache!\n");
-               return;
-       }
-       if (page->inode) {
-               printk ("VM: read_swap_page: page already in page cache!\n");
-               return;
-       }
+
+       if (TryLockPage(page))
+               PAGE_BUG(page);
+       if (test_and_set_bit(PG_swap_cache, &page->flags))
+               PAGE_BUG(page);
+       if (page->inode)
+               PAGE_BUG(page);
+       get_page(page);         /* Protect from shrink_mmap() */
         page->inode = &swapper_inode;
         page->offset = entry;
-       atomic_inc(&page->count);       /* Protect from shrink_mmap() */
         rw_swap_page(rw, entry, buffer, 1);
-       atomic_dec(&page->count);
-       page->inode = 0;
-       clear_bit(PG_swap_cache, &page->flags);
+
+       /*
+        * and now remove it from the pagecache ...
+        */
+       if (TryLockPage(page))
+               PAGE_BUG(page);
+       PageClearSwapCache(page);
+       remove_inode_page(page);
+       page_cache_release(page);
+       UnlockPage(page);
  }
  
  /*
diff --git a/mm/swap_state.c b/mm/swap_state.c

index 8c5e7176c5aa668ccfc4ad0997af4f47380386ad..e0e667339c195145c0182c1f8a279b2f817a39ed 100644 (file)
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -25,7 +25,32 @@
   * ensure that any mistaken dereferences of this structure cause a
   * kernel oops.
   */
-struct inode swapper_inode;
+
+static struct inode_operations swapper_inode_operations = {
+       NULL,                           /* default file operations */
+       NULL,                           /* create */
+       NULL,                           /* lookup */
+       NULL,                           /* link */
+       NULL,                           /* unlink */
+       NULL,                           /* symlink */
+       NULL,                           /* mkdir */
+       NULL,                           /* rmdir */
+       NULL,                           /* mknod */
+       NULL,                           /* rename */
+       NULL,                           /* readlink */
+       NULL,                           /* follow_link */
+       NULL,                           /* readpage */
+       NULL,                           /* writepage */
+       NULL,                           /* bmap */
+       NULL,                           /* truncate */
+       NULL,                           /* permission */
+       NULL,                           /* smap */
+       NULL,                           /* updatepage */
+       NULL,                           /* revalidate */
+       generic_block_flushpage,        /* flushpage */
+};
+
+struct inode swapper_inode = { i_op: &swapper_inode_operations };
  
  #ifdef SWAP_CACHE_INFO
  unsigned long swap_cache_add_total = 0;
@@ -49,20 +74,20 @@ int add_to_swap_cache(struct page *page, unsigned long entry)
  #endif
  #ifdef DEBUG_SWAP
         printk("DebugVM: add_to_swap_cache(%08lx count %d, entry %08lx)\n",
-              page_address(page), atomic_read(&page->count), entry);
+                  page_address(page), page_count(page), entry);
  #endif
         if (PageTestandSetSwapCache(page)) {
                 printk(KERN_ERR "swap_cache: replacing non-empty entry %08lx "
-                      "on page %08lx\n",
-                      page->offset, page_address(page));
+                          "on page %08lx\n",
+                          page->offset, page_address(page));
                 return 0;
         }
         if (page->inode) {
                 printk(KERN_ERR "swap_cache: replacing page-cached entry "
-                      "on page %08lx\n", page_address(page));
+                          "on page %08lx\n", page_address(page));
                 return 0;
         }
-       atomic_inc(&page->count);
+       get_page(page);
         page->inode = &swapper_inode;
         page->offset = entry;
         add_page_to_hash_queue(page, &swapper_inode, entry);
@@ -111,7 +136,7 @@ int swap_duplicate(unsigned long entry)
         result = 1;
  #ifdef DEBUG_SWAP
         printk("DebugVM: swap_duplicate(entry %08lx, count now %d)\n",
-              entry, p->swap_map[offset]);
+                  entry, p->swap_map[offset]);
  #endif
  out:
         return result;
@@ -127,7 +152,7 @@ bad_offset:
  bad_unused:
         printk(KERN_ERR
                 "swap_duplicate at %8p: entry %08lx, unused page\n", 
-              __builtin_return_address(0), entry);
+                  __builtin_return_address(0), entry);
         goto out;
  }
  
@@ -153,7 +178,7 @@ int swap_count(unsigned long entry)
         retval = p->swap_map[offset];
  #ifdef DEBUG_SWAP
         printk("DebugVM: swap_count(entry %08lx, count %d)\n",
-              entry, retval);
+                  entry, retval);
  #endif
  out:
         return retval;
@@ -163,16 +188,16 @@ bad_entry:
         goto out;
  bad_file:
         printk(KERN_ERR
-              "swap_count: entry %08lx, nonexistent swap file!\n", entry);
+                  "swap_count: entry %08lx, nonexistent swap file!\n", entry);
         goto out;
  bad_offset:
         printk(KERN_ERR
-              "swap_count: entry %08lx, offset exceeds max!\n", entry);
+                  "swap_count: entry %08lx, offset exceeds max!\n", entry);
         goto out;
  bad_unused:
         printk(KERN_ERR
-              "swap_count at %8p: entry %08lx, unused page!\n", 
-              __builtin_return_address(0), entry);
+                  "swap_count at %8p: entry %08lx, unused page!\n", 
+                  __builtin_return_address(0), entry);
         goto out;
  }
  
@@ -190,10 +215,11 @@ static inline void remove_from_swap_cache(struct page *page)
  
  #ifdef DEBUG_SWAP
         printk("DebugVM: remove_from_swap_cache(%08lx count %d)\n",
-              page_address(page), atomic_read(&page->count));
+                  page_address(page), page_count(page));
  #endif
-       PageClearSwapCache (page);
+       PageClearSwapCache(page);
         remove_inode_page(page);
+       page_cache_release(page);
  }
  
  
@@ -205,16 +231,19 @@ void delete_from_swap_cache(struct page *page)
  {
         long entry = page->offset;
  
+       LockPage(page);
+
  #ifdef SWAP_CACHE_INFO
         swap_cache_del_total++;
  #endif
  #ifdef DEBUG_SWAP
         printk("DebugVM: delete_from_swap_cache(%08lx count %d, "
-              "entry %08lx)\n",
-              page_address(page), atomic_read(&page->count), entry);
+                  "entry %08lx)\n",
+                  page_address(page), page_count(page), entry);
  #endif
         remove_from_swap_cache (page);
         swap_free (entry);
+       UnlockPage(page);
  }
  
  /* 
@@ -238,9 +267,10 @@ void free_page_and_swap_cache(unsigned long addr)
  
  
  /*
- * Lookup a swap entry in the swap cache.  We need to be careful about
- * locked pages.  A found page will be returned with its refcount
- * incremented.
+ * Lookup a swap entry in the swap cache. A found page will be returned
+ * unlocked and with its refcount incremented - we rely on the kernel
+ * lock getting page table operations atomic even if we drop the page
+ * lock before returning.
   */
  
  struct page * lookup_swap_cache(unsigned long entry)
@@ -251,23 +281,21 @@ struct page * lookup_swap_cache(unsigned long entry)
         swap_cache_find_total++;
  #endif
         while (1) {
-               found = find_page(&swapper_inode, entry);
+               found = find_lock_page(&swapper_inode, entry);
                 if (!found)
                         return 0;
                 if (found->inode != &swapper_inode || !PageSwapCache(found))
                         goto out_bad;
-               if (!PageLocked(found)) {
  #ifdef SWAP_CACHE_INFO
-                       swap_cache_find_success++;
+               swap_cache_find_success++;
  #endif
-                       return found;
-               }
-               __free_page(found);
-               __wait_on_page(found);
+               UnlockPage(found);
+               return found;
         }
  
  out_bad:
         printk (KERN_ERR "VM: Found a non-swapper swap page!\n");
+       UnlockPage(found);
         __free_page(found);
         return 0;
  }
@@ -288,7 +316,7 @@ struct page * read_swap_cache_async(unsigned long entry, int wait)
         
  #ifdef DEBUG_SWAP
         printk("DebugVM: read_swap_cache_async entry %08lx%s\n",
-              entry, wait ? ", wait" : "");
+                  entry, wait ? ", wait" : "");
  #endif
         /*
          * Make sure the swap entry is still in use.
@@ -319,12 +347,12 @@ struct page * read_swap_cache_async(unsigned long entry, int wait)
         if (!add_to_swap_cache(new_page, entry))
                 goto out_free_page;
  
-       set_bit(PG_locked, &new_page->flags);
+       LockPage(new_page);
         rw_swap_page(READ, entry, (char *) new_page_addr, wait);
  #ifdef DEBUG_SWAP
         printk("DebugVM: read_swap_cache_async created "
-              "entry %08lx at %p\n",
-              entry, (char *) page_address(new_page));
+                  "entry %08lx at %p\n",
+                       entry, (char *) page_address(new_page));
  #endif
         return new_page;
  
@@ -335,3 +363,4 @@ out_free_swap:
  out:
         return found_page;
  }
+
diff --git a/mm/swapfile.c b/mm/swapfile.c

index de29f10067601337db202f2e2573ae0117786da8..794e39aff0fb5ca7877f9c05d4db6dc83eff58ec 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -192,7 +192,7 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
                 return;
         set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
         swap_free(entry);
-       atomic_inc(&mem_map[MAP_NR(page)].count);
+       get_page(mem_map + MAP_NR(page));
         ++vma->vm_mm->rss;
  }
  
diff --git a/mm/vmscan.c b/mm/vmscan.c

index d651e6f949fd350401d84d29a9356a73a998dd76..9ca4988e4a550125e64c3d3fd65890d088e16836 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -157,7 +157,7 @@ drop_pte:
         add_to_swap_cache(page_map, entry);
         /* We checked we were unlocked way up above, and we
            have been careful not to stall until here */
-       set_bit(PG_locked, &page_map->flags);
+       LockPage(page_map);
  
         /* OK, do a physical asynchronous write to swap.  */
         rw_swap_page(WRITE, entry, (char *) page, 0);
diff --git a/net/netsyms.c b/net/netsyms.c

index 021e17ced6b03c91dca250066d08496366711daf..ff92b23f11dc441dea4c5b41fddd878c10f6ccbe 100644 (file)
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -377,7 +377,6 @@ EXPORT_SYMBOL(rtattr_parse);
  EXPORT_SYMBOL(rtnetlink_links);
  EXPORT_SYMBOL(__rta_fill);
  EXPORT_SYMBOL(rtnetlink_dump_ifinfo);
-EXPORT_SYMBOL(rtnl_wlockct);
  EXPORT_SYMBOL(rtnl);
  EXPORT_SYMBOL(neigh_delete);
  EXPORT_SYMBOL(neigh_add);
author	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:25:25 +0000 (15:25 -0500)
committer	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:25:25 +0000 (15:25 -0500)
CREDITS		patch \| blob \| history
Makefile		patch \| blob \| history
arch/i386/mm/init.c		patch \| blob \| history
drivers/block/ll_rw_blk.c		patch \| blob \| history
drivers/char/n_hdlc.c		patch \| blob \| history
drivers/char/synclink.c		patch \| blob \| history
drivers/usb/usb-core.c		patch \| blob \| history
drivers/video/vgacon.c		patch \| blob \| history
fs/buffer.c		patch \| blob \| history
fs/ext2/balloc.c		patch \| blob \| history
fs/ext2/file.c		patch \| blob \| history
fs/ext2/inode.c		patch \| blob \| history
fs/ext2/truncate.c		patch \| blob \| history
fs/fifo.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
fs/nfs/dir.c		patch \| blob \| history
fs/nfs/file.c		patch \| blob \| history
fs/nfs/read.c		patch \| blob \| history
fs/nfs/symlink.c		patch \| blob \| history
fs/nfs/write.c		patch \| blob \| history
fs/pipe.c		patch \| blob \| history
fs/proc/array.c		patch \| blob \| history
fs/proc/mem.c		patch \| blob \| history
fs/smbfs/file.c		patch \| blob \| history
include/asm-i386/page.h		patch \| blob \| history
include/linux/ext2_fs.h		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/hpfs_fs_i.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
include/linux/msdos_fs_i.h		patch \| blob \| history
include/linux/nfs_fs_i.h		patch \| blob \| history
include/linux/pagemap.h		patch \| blob \| history
include/linux/pipe_fs_i.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/swap.h		patch \| blob \| history
include/linux/synclink.h		patch \| blob \| history
include/linux/umsdos_fs_i.h		patch \| blob \| history
ipc/shm.c		patch \| blob \| history
kernel/ksyms.c		patch \| blob \| history
mm/filemap.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mmap.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/page_io.c		patch \| blob \| history
mm/swap_state.c		patch \| blob \| history
mm/swapfile.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history
net/netsyms.c		patch \| blob \| history