From cbf5d468756d4db31bdd5c79b37538a8495222d6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:25:38 -0500 Subject: [PATCH] Linux-2.3.7.. Let's be careful out there.. The new and much improved fully page-cache based filesystem code is now apparently stable, and works wonderfully well performancewise. We fixed all known issues with the IO subsystem: it scales well in SMP, and it avoids unnecessary copies and unnecessary temporary buffers for write-out. The shared mapping code in particular is much cleaner and also a _lot_ faster. In short, it's perfect. And we want as many people as possible out there testing out the new cool code, and bask in the success stories.. HOWEVER. _Just_ in case something goes wrong [ extremely unlikely of course. Sure. Sue me ], we want to indeminfy ourselves. There just might be a bug hiding there somewhere, and it might eat your filesystem while laughing in glee over you being naive and testing new code. So you have been warned. In particular, there's some indication that it might have problems on sparc still (and/or other architectures), possibly due to the ext2fs byte order cleanups that have also been done in order to reach the afore-mentioned state of perfection. I'd be especially interested in people running databases on top of Linux: Solid server in particular is very fsync-happy, and that's one of the operations that have been speeded up by orders of magnitude. Linus --- arch/i386/kernel/mca.c | 1 - drivers/pci/pci.c | 24 ++- drivers/usb/acm.c | 2 +- drivers/usb/audio.c | 2 +- drivers/usb/cpia.c | 2 +- drivers/usb/hub.c | 2 +- drivers/usb/keyboard.c | 2 +- drivers/usb/mouse.c | 2 +- drivers/usb/ohci-hcd.c | 2 +- drivers/usb/ohci.c | 4 +- drivers/usb/uhci-debug.c | 2 +- drivers/usb/uhci.c | 39 ++-- drivers/usb/usb.h | 4 +- drivers/usb/usb_scsi.c | 402 +++++++++++++++++++++++++++++------ drivers/usb/usb_scsi_debug.c | 2 +- fs/affs/dir.c | 1 - fs/affs/file.c | 2 - fs/autofs/dir.c | 1 - fs/autofs/root.c | 1 - fs/autofs/symlink.c | 1 - fs/bad_inode.c | 4 +- fs/block_dev.c | 1 + fs/buffer.c | 273 +++++++++++++++--------- fs/devices.c | 7 +- fs/devpts/root.c | 1 - fs/ext2/dir.c | 6 +- fs/ext2/file.c | 18 +- fs/ext2/inode.c | 62 +++--- fs/ext2/symlink.c | 6 +- fs/fifo.c | 7 +- fs/hfs/dir_nat.c | 2 - fs/hfs/file.c | 1 - fs/hfs/file_cap.c | 1 - fs/hfs/file_hdr.c | 1 - fs/hpfs/inode.c | 3 - fs/isofs/file.c | 5 +- fs/isofs/inode.c | 12 +- fs/minix/file.c | 3 +- fs/msdos/namei.c | 1 - fs/ncpfs/dir.c | 1 - fs/nfs/dir.c | 4 +- fs/nfs/file.c | 5 +- fs/nfs/read.c | 3 + fs/nfs/symlink.c | 7 +- fs/nfs/write.c | 3 + fs/ntfs/fs.c | 3 - fs/pipe.c | 7 +- fs/proc/array.c | 14 +- fs/proc/base.c | 7 +- fs/proc/fd.c | 7 +- fs/proc/generic.c | 38 ++-- fs/proc/kmsg.c | 7 +- fs/proc/link.c | 7 +- fs/proc/mem.c | 7 +- fs/proc/net.c | 7 +- fs/proc/omirr.c | 38 ++-- fs/proc/proc_devtree.c | 6 +- fs/proc/root.c | 42 ++-- fs/proc/scsi.c | 37 ++-- fs/proc/sysvipc.c | 37 ++-- fs/smbfs/dir.c | 1 - fs/smbfs/file.c | 1 - fs/sysv/file.c | 3 +- fs/ufs/file.c | 3 +- fs/umsdos/dir.c | 1 - fs/umsdos/rdir.c | 1 - fs/umsdos/symlink.c | 1 - include/linux/fs.h | 56 +++-- include/linux/swap.h | 1 + kernel/ksyms.c | 1 - kernel/sysctl.c | 7 +- mm/filemap.c | 162 ++++++++------ mm/swap_state.c | 29 ++- 73 files changed, 989 insertions(+), 477 deletions(-) diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index 9c6948c600ad..ae1980a42b92 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -148,7 +148,6 @@ static struct inode_operations proc_mca_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; #endif diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index bd385ccf8e2b..9f5ecd98ddf9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -47,13 +47,23 @@ pci_find_slot(unsigned int bus, unsigned int devfn) struct pci_dev * pci_find_device(unsigned int vendor, unsigned int device, struct pci_dev *from) { - if (!from) - from = pci_devices; - else - from = from->next; - while (from && (from->vendor != vendor && vendor != PCI_ANY_ID || from->device != device && device != PCI_ANY_ID)) - from = from->next; - return from; + struct pci_dev *next; + + next = pci_devices; + if (from) + next = from->next; + + while (next) { + struct pci_dev *dev = next; + next = next->next; + if (vendor != PCI_ANY_ID && dev->vendor != vendor) + continue; + if (device != PCI_ANY_ID && dev->device != device) + continue; + + return dev; + } + return NULL; } diff --git a/drivers/usb/acm.c b/drivers/usb/acm.c index d4796e28c30b..10c837d5a28e 100644 --- a/drivers/usb/acm.c +++ b/drivers/usb/acm.c @@ -50,7 +50,7 @@ static struct acm_state static_acm_state; spinlock_t usb_acm_lock = SPIN_LOCK_UNLOCKED; -static int acm_irq(int state, void *__buffer, void *dev_id) +static int acm_irq(int state, void *__buffer, int len, void *dev_id) { // unsigned char *data = __buffer; struct acm_state *acm = &static_acm_state; diff --git a/drivers/usb/audio.c b/drivers/usb/audio.c index 45a276772152..9743ec89e4e2 100644 --- a/drivers/usb/audio.c +++ b/drivers/usb/audio.c @@ -27,7 +27,7 @@ static struct usb_driver usb_audio_driver = }; -static int usb_audio_irq(int state, void *buffer, void *dev_id) +static int usb_audio_irq(int state, void *buffer, int len, void *dev_id) { struct usb_audio *aud = (struct usb_audio*) dev_id; return 1; diff --git a/drivers/usb/cpia.c b/drivers/usb/cpia.c index 87e1e4254cb8..2402d3425ea3 100644 --- a/drivers/usb/cpia.c +++ b/drivers/usb/cpia.c @@ -451,7 +451,7 @@ printk("copying\n"); } } -static int cpia_isoc_irq(int status, void *__buffer, void *dev_id) +static int cpia_isoc_irq(int status, void *__buffer, int len, void *dev_id) { struct usb_cpia *cpia = dev_id; struct usb_device *dev = cpia->dev; diff --git a/drivers/usb/hub.c b/drivers/usb/hub.c index 1cd7d7ccbd91..0a1ec1f011b2 100644 --- a/drivers/usb/hub.c +++ b/drivers/usb/hub.c @@ -33,7 +33,7 @@ static int khubd_pid = 0; * the low-level driver that it wants to be re-activated, * or zero to say "I'm done". */ -static int hub_irq(int status, void *__buffer, void *dev_id) +static int hub_irq(int status, void *__buffer, int len, void *dev_id) { struct usb_hub *hub = dev_id; unsigned long flags; diff --git a/drivers/usb/keyboard.c b/drivers/usb/keyboard.c index 5d93a5a84b25..e87519d9f52b 100644 --- a/drivers/usb/keyboard.c +++ b/drivers/usb/keyboard.c @@ -92,7 +92,7 @@ usb_kbd_repeat(unsigned long dev_id) } static int -usb_kbd_irq(int state, void *buffer, void *dev_id) +usb_kbd_irq(int state, void *buffer, int len, void *dev_id) { struct usb_keyboard *kbd = (struct usb_keyboard*) dev_id; unsigned long *down = (unsigned long*) buffer; diff --git a/drivers/usb/mouse.c b/drivers/usb/mouse.c index f094c0b0dd76..a79c10a070ec 100644 --- a/drivers/usb/mouse.c +++ b/drivers/usb/mouse.c @@ -60,7 +60,7 @@ static struct mouse_state static_mouse_state; spinlock_t usb_mouse_lock = SPIN_LOCK_UNLOCKED; -static int mouse_irq(int state, void *__buffer, void *dev_id) +static int mouse_irq(int state, void *__buffer, int len, void *dev_id) { signed char *data = __buffer; /* finding the mouse is easy when there's only one */ diff --git a/drivers/usb/ohci-hcd.c b/drivers/usb/ohci-hcd.c index 820efc5dcb49..8db61e08e6aa 100644 --- a/drivers/usb/ohci-hcd.c +++ b/drivers/usb/ohci-hcd.c @@ -102,7 +102,7 @@ static int sohci_int_handler(void * ohci_in, unsigned int ep_addr, int ctrl_len, OHCI_DEBUG( for(i=0; i < data_len; i++ ) printk(" %02x", ((__u8 *) data)[i]);) OHCI_DEBUG( printk(" ret_status: %x\n", status); }) - ret = handler(cc_to_status[status & 0xf], data, dev_id); + ret = handler(cc_to_status[status & 0xf], data, data_len, dev_id); if(ret == 0) return 0; /* 0 .. do not requeue */ if(status > 0) return -1; /* error occured do not requeue ? */ ohci_trans_req(ohci, ep_addr, 0, NULL, data, 8, (__OHCI_BAG) handler, (__OHCI_BAG) dev_id); /* requeue int request */ diff --git a/drivers/usb/ohci.c b/drivers/usb/ohci.c index 22b46a3969cc..48191e11be26 100644 --- a/drivers/usb/ohci.c +++ b/drivers/usb/ohci.c @@ -777,7 +777,7 @@ static DECLARE_WAIT_QUEUE_HEAD(control_wakeup); * * This function is called from the interrupt handler. */ -static int ohci_control_completed(int stats, void *buffer, void *dev_id) +static int ohci_control_completed(int stats, void *buffer, int len, void *dev_id) { /* pass the TDs completion status back to control_msg */ if (dev_id) { @@ -1456,7 +1456,7 @@ static void ohci_reap_donelist(struct ohci *ohci) /* Check if TD should be re-queued */ if ((td->completed != NULL) && - (td->completed(cc, td->data, td->dev_id))) { + (td->completed(cc, td->data, -1 /* XXX */, td->dev_id))) { /* Mark the TD as active again: * Set the not accessed condition code * Reset the Error count diff --git a/drivers/usb/uhci-debug.c b/drivers/usb/uhci-debug.c index 7c577a58f7b9..32549763e7d4 100644 --- a/drivers/usb/uhci-debug.c +++ b/drivers/usb/uhci-debug.c @@ -131,7 +131,7 @@ void show_queue(struct uhci_qh *qh) #if 0 printk(" link = %p, element = %p\n", qh->link, qh->element); #endif - if(!qh->element) { + if(!(qh->element & ~0xF)) { printk(" td 0 = NULL\n"); return; } diff --git a/drivers/usb/uhci.c b/drivers/usb/uhci.c index 2f8010ed1c90..c03ce5adf5b0 100644 --- a/drivers/usb/uhci.c +++ b/drivers/usb/uhci.c @@ -126,7 +126,7 @@ static int uhci_td_result(struct uhci_device *dev, struct uhci_td *td, unsigned tmp = td->first; printk("uhci_td_result() failed with status %x\n", status); - show_status(dev->uhci); + //show_status(dev->uhci); do { show_td(tmp); if ((tmp->link & 1) || (tmp->link & 2)) @@ -422,7 +422,7 @@ static int uhci_remove_irq(struct usb_device *usb_dev, unsigned int pipe, usb_de /* notify removal */ - td->completed(USB_ST_REMOVED, NULL, td->dev_id); + td->completed(USB_ST_REMOVED, NULL, 0, td->dev_id); /* this is DANGEROUS - not sure whether this is right */ @@ -645,7 +645,7 @@ void uhci_delete_isochronous(struct usb_device *usb_dev, void *_isodesc) */ static DECLARE_WAIT_QUEUE_HEAD(control_wakeup); -static int uhci_control_completed(int status, void *buffer, void *dev_id) +static int uhci_control_completed(int status, void *buffer, int len, void *dev_id) { wake_up(&control_wakeup); return 0; /* Don't re-instate */ @@ -692,7 +692,7 @@ static int uhci_run_control(struct uhci_device *dev, struct uhci_td *first, stru // show_status(dev->uhci); // show_queues(dev->uhci); - schedule_timeout(HZ/10); + schedule_timeout(HZ*5); // control should be empty here... // show_status(dev->uhci); @@ -736,8 +736,7 @@ static int uhci_run_control(struct uhci_device *dev, struct uhci_td *first, stru * information, that's just ridiculously high. Most * control messages have just a few bytes of data. */ -static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, - devrequest *cmd, void *data, int len) +static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, void *cmd, void *data, int len) { struct uhci_device *dev = usb_to_uhci(usb_dev); struct uhci_td *first, *td, *prevtd; @@ -805,17 +804,18 @@ static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, } /* - * Build the final TD for control status + * Build the final TD for control status */ destination ^= (0xE1 ^ 0x69); /* OUT -> IN */ destination |= 1 << 19; /* End in Data1 */ - td->link = 1; /* Terminate */ - td->status = status | (1 << 24); /* IOC */ + td->backptr = &prevtd->link; + td->status = (status /* & ~(3 << 27) */) | (1 << 24); /* no limit on final packet */ td->info = destination | (0x7ff << 21); /* 0 bytes of data */ td->buffer = 0; td->first = first; - td->backptr = &prevtd->link; + td->link = 1; /* Terminate */ + /* Start it up.. */ ret = uhci_run_control(dev, first, td); @@ -841,7 +841,7 @@ static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, } if (uhci_debug && ret) { - __u8 *p = (__u8 *) cmd; + __u8 *p = cmd; printk("Failed cmd - %02X %02X %02X %02X %02X %02X %02X %02X\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]); @@ -860,7 +860,7 @@ static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, */ static DECLARE_WAIT_QUEUE_HEAD(bulk_wakeup); -static int uhci_bulk_completed(int status, void *buffer, void *dev_id) +static int uhci_bulk_completed(int status, void *buffer, int len, void *dev_id) { wake_up(&bulk_wakeup); return 0; /* Don't re-instate */ @@ -908,10 +908,11 @@ static int uhci_run_bulk(struct uhci_device *dev, struct uhci_td *first, struct // show_status(dev->uhci); // show_queues(dev->uhci); - schedule_timeout(HZ/10); + schedule_timeout(HZ*5); // show_status(dev->uhci); // show_queues(dev->uhci); + //show_queue(first->qh); remove_wait_queue(&bulk_wakeup, &wait); /* Clean up in case it failed.. */ @@ -1243,6 +1244,7 @@ static void uhci_interrupt_notify(struct uhci *uhci) { struct list_head *head = &uhci->interrupt_list; struct list_head *tmp; + int status; spin_lock(&irqlist_lock); tmp = head->next; @@ -1252,12 +1254,14 @@ static void uhci_interrupt_notify(struct uhci *uhci) next = tmp->next; - if (!(td->status & (1 << 23))) { /* No longer active? */ + if (!((status = td->status) & (1 << 23)) || /* No longer active? */ + ((td->qh->element & ~15) && + !((status = uhci_link_to_td(td->qh->element)->status) & (1 <<23)) && + (status & 0x760000) /* is in error state (Stall, db, babble, timeout, bitstuff) */)) { /* remove from IRQ list */ __list_del(tmp->prev, next); INIT_LIST_HEAD(tmp); - if (td->completed(uhci_map_status((td->status & 0xff)>> 16, 0), - bus_to_virt(td->buffer), td->dev_id)) { + if (td->completed(uhci_map_status(status, 0), bus_to_virt(td->buffer), -1, td->dev_id)) { list_add(&td->irq_list, &uhci->interrupt_list); if (!(td->status & (1 << 25))) { @@ -1284,7 +1288,7 @@ static void uhci_interrupt_notify(struct uhci *uhci) /* If completed wants to not reactivate, then it's */ /* responsible for free'ing the TD's and QH's */ /* or another function (such as run_control) */ - } + } tmp = next; } spin_unlock(&irqlist_lock); @@ -1564,6 +1568,7 @@ static int uhci_control_thread(void * __uhci) { struct uhci *uhci = (struct uhci *)__uhci; struct uhci_device * root_hub =usb_to_uhci(uhci->bus->root_hub); + lock_kernel(); request_region(uhci->io_addr, 32, "usb-uhci"); diff --git a/drivers/usb/usb.h b/drivers/usb/usb.h index 63ebeffb93c9..a6bf78e4af25 100644 --- a/drivers/usb/usb.h +++ b/drivers/usb/usb.h @@ -242,10 +242,12 @@ struct usb_driver { * until we come up with a common meaning. * void *buffer - This is a pointer to the data used in this * USB transfer. + * int length - This is the number of bytes transferred in or out + * of the buffer by this transfer. (-1 = unknown/unsupported) * void *dev_id - This is a user defined pointer set when the IRQ * is requested that is passed back. */ -typedef int (*usb_device_irq)(int, void *, void *); +typedef int (*usb_device_irq)(int, void *, int, void *); struct usb_operations { struct usb_device *(*allocate)(struct usb_device *); diff --git a/drivers/usb/usb_scsi.c b/drivers/usb/usb_scsi.c index 655045bea98f..1a3e16b257ee 100644 --- a/drivers/usb/usb_scsi.c +++ b/drivers/usb/usb_scsi.c @@ -74,7 +74,9 @@ struct us_data { __u8 ep_int; /* interrupt . */ __u8 subclass; /* as in overview */ __u8 protocol; /* .............. */ + __u8 attention_done; /* force attention on first command */ int (*pop)(Scsi_Cmnd *); /* protocol specific do cmd */ + int (*pop_reset)(struct us_data *); /* ................. device reset */ GUID(guid); /* unique dev id */ struct Scsi_Host *host; /* our dummy host data */ Scsi_Host_Template *htmplt; /* own host template */ @@ -142,6 +144,9 @@ static int us_one_transfer(struct us_data *us, int pipe, char *buf, int length) /* we want to retry if the device reported NAK */ if (result == USB_ST_TIMEOUT) { + if (partial != this_xfer) { + return 0; /* I do not like this */ + } if (!maxtry--) break; this_xfer -= partial; @@ -150,6 +155,11 @@ static int us_one_transfer(struct us_data *us, int pipe, char *buf, int length) /* short data - assume end */ result = USB_ST_DATAUNDERRUN; break; + } else if (result == USB_ST_STALL && us->protocol == US_PR_CB) { + if (!maxtry--) + break; + this_xfer -= partial; + buf += partial; } else break; } while ( this_xfer ); @@ -216,27 +226,57 @@ static unsigned int us_transfer_length(Scsi_Cmnd *srb) } -static int pop_CBI_irq(int state, void *buffer, void *dev_id) +static int pop_CBI_irq(int state, void *buffer, int len, void *dev_id) { struct us_data *us = (struct us_data *)dev_id; if (state != USB_ST_REMOVED) { us->ip_data = *(__u16 *)buffer; - us->ip_wanted = 0; + US_DEBUGP("Interrupt Status %x\n", us->ip_data); } - wake_up(&us->ip_waitq); + if (us->ip_wanted) + wake_up(&us->ip_waitq); + us->ip_wanted = 0; /* we dont want another interrupt */ return 0; } + +static int pop_CB_reset(struct us_data *us) +{ + unsigned char cmd[12]; + devrequest dr; + int result; + + dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE; + dr.request = US_CBI_ADSC; + dr.value = 0; + dr.index = us->pusb_dev->ifnum; + dr.length = 12; + memset(cmd, -1, sizeof(cmd)); + cmd[0] = SEND_DIAGNOSTIC; + cmd[1] = 4; + us->pusb_dev->bus->op->control_msg(us->pusb_dev, + usb_sndctrlpipe(us->pusb_dev,0), + &dr, cmd, 12); + + usb_clear_halt(us->pusb_dev, us->ep_in | 0x80); + usb_clear_halt(us->pusb_dev, us->ep_out); + + /* long wait for reset */ + + schedule_timeout(HZ*5); + return 0; +} + static int pop_CB_command(Scsi_Cmnd *srb) { struct us_data *us = (struct us_data *)srb->host_scribble; devrequest dr; unsigned char cmd[16]; int result; - int retry = 1; + int retry = 5; int done_start = 0; while (retry--) { @@ -279,7 +319,8 @@ static int pop_CB_command(Scsi_Cmnd *srb) result = us->pusb_dev->bus->op->control_msg(us->pusb_dev, usb_sndctrlpipe(us->pusb_dev,0), &dr, cmd, us->fixedlength); - if (!done_start && us->subclass == US_SC_UFI && cmd[0] == TEST_UNIT_READY && result) { + if (!done_start && (us->subclass == US_SC_UFI /*|| us->subclass == US_SC_8070*/) + && cmd[0] == TEST_UNIT_READY && result) { /* as per spec try a start command, wait and retry */ done_start++; @@ -302,35 +343,47 @@ static int pop_CB_command(Scsi_Cmnd *srb) return result; } -/* Protocol command handlers */ +/* + * Control/Bulk status handler + */ -static int pop_CBI(Scsi_Cmnd *srb) +static int pop_CB_status(Scsi_Cmnd *srb) { struct us_data *us = (struct us_data *)srb->host_scribble; int result; + __u8 status[2]; + devrequest dr; + int retry = 5; - /* run the command */ - - if ((result = pop_CB_command(srb))) { - US_DEBUGP("CBI command %x\n", result); - if (result == USB_ST_STALL || result == USB_ST_TIMEOUT) - return (DID_OK << 16) | 2; - return DID_ABORT << 16; - } - - /* transfer the data */ - - if (us_transfer_length(srb)) { - result = us_transfer(srb, US_DIRECTION(srb->cmnd[0])); - if (result && result != USB_ST_DATAUNDERRUN) { - US_DEBUGP("CBI transfer %x\n", result); + switch (us->protocol) { + case US_PR_CB: + /* get from control */ + + while (retry--) { + dr.requesttype = 0x80 | USB_TYPE_STANDARD | USB_RT_DEVICE; + dr.request = USB_REQ_GET_STATUS; + dr.index = 0; + dr.value = 0; + dr.length = 2; + result = us->pusb_dev->bus->op->control_msg(us->pusb_dev, + usb_rcvctrlpipe(us->pusb_dev,0), + &dr, status, sizeof(status)); + if (result != USB_ST_TIMEOUT) + break; + } + if (result) { + US_DEBUGP("Bad AP status request %d\n", result); return DID_ABORT << 16; } - } - - /* get status */ + US_DEBUGP("Got AP status %x %x\n", status[0], status[1]); + if (srb->cmnd[0] != REQUEST_SENSE && srb->cmnd[0] != INQUIRY && + ( (status[0] & ~3) || status[1])) + return (DID_OK << 16) | 2; + else + return DID_OK << 16; + break; - if (us->protocol == US_PR_CBI) { + case US_PR_CBI: /* get from interrupt pipe */ /* add interrupt transfer, marked for removal */ @@ -367,12 +420,48 @@ static int pop_CBI(Scsi_Cmnd *srb) return DID_ABORT << 16; } return (DID_OK << 16) + ((us->ip_data & 0x300) ? 2 : 0); - } else { - /* get from where? */ } return DID_ERROR << 16; } +/* Protocol command handlers */ + +static int pop_CBI(Scsi_Cmnd *srb) +{ + struct us_data *us = (struct us_data *)srb->host_scribble; + int result; + + /* run the command */ + + if ((result = pop_CB_command(srb))) { + US_DEBUGP("CBI command %x\n", result); + if (result == USB_ST_STALL || result == USB_ST_TIMEOUT) { + return (DID_OK << 16) | 2; + } + return DID_ABORT << 16; + } + + /* transfer the data */ + + if (us_transfer_length(srb)) { + result = us_transfer(srb, US_DIRECTION(srb->cmnd[0])); + if (result && result != USB_ST_DATAUNDERRUN) { + US_DEBUGP("CBI transfer %x\n", result); + return DID_ABORT << 16; + } else if (result == USB_ST_DATAUNDERRUN) { + return DID_OK << 16; + } + } else { + if (!result) { + return DID_OK << 16; + } + } + + /* get status */ + + return pop_CB_status(srb); +} + static int pop_Bulk_reset(struct us_data *us) { devrequest dr; @@ -380,21 +469,20 @@ static int pop_Bulk_reset(struct us_data *us) dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE; dr.request = US_BULK_RESET; - dr.value = US_BULK_RESET_SOFT; + dr.value = US_BULK_RESET_HARD; dr.index = 0; dr.length = 0; - US_DEBUGP("Bulk soft reset\n"); result = us->pusb_dev->bus->op->control_msg(us->pusb_dev, usb_sndctrlpipe(us->pusb_dev,0), &dr, NULL, 0); - if (result) { - US_DEBUGP("Bulk soft reset failed %d\n", result); - dr.value = US_BULK_RESET_HARD; - result = us->pusb_dev->bus->op->control_msg(us->pusb_dev, usb_sndctrlpipe(us->pusb_dev,0), &dr, NULL, 0); - if (result) - US_DEBUGP("Bulk hard reset failed %d\n", result); - } + if (result) + US_DEBUGP("Bulk hard reset failed %d\n", result); usb_clear_halt(us->pusb_dev, us->ep_in | 0x80); usb_clear_halt(us->pusb_dev, us->ep_out); + + /* long wait for reset */ + + schedule_timeout(HZ*5); + return result; } /* @@ -453,8 +541,6 @@ static int pop_Bulk(Scsi_Cmnd *srb) stall = 0; do { - //usb_settoggle(us->pusb_dev, us->ep_in, 0); /* AAARgh!! */ - US_DEBUGP("Toggle is %d\n", usb_gettoggle(us->pusb_dev, us->ep_in)); result = us->pusb_dev->bus->op->bulk_msg(us->pusb_dev, usb_rcvbulkpipe(us->pusb_dev, us->ep_in), &bcs, US_BULK_CS_WRAP_LEN, &partial); @@ -564,6 +650,9 @@ static int us_queuecommand( Scsi_Cmnd *srb , void (*done)(Scsi_Cmnd *)) struct us_data *us = (struct us_data *)srb->host->hostdata[0]; US_DEBUGP("Command wakeup\n"); + if (us->srb) { + /* busy */ + } srb->host_scribble = (unsigned char *)us; us->srb = srb; srb->scsi_done = done; @@ -581,9 +670,12 @@ static int us_abort( Scsi_Cmnd *srb ) return 0; } -static int us_device_reset( Scsi_Cmnd *srb ) +static int us_bus_reset( Scsi_Cmnd *srb ) { - return 0; + struct us_data *us = (struct us_data *)srb->host->hostdata[0]; + + us->pop_reset(us); + return SUCCESS; } static int us_host_reset( Scsi_Cmnd *srb ) @@ -591,10 +683,6 @@ static int us_host_reset( Scsi_Cmnd *srb ) return 0; } -static int us_bus_reset( Scsi_Cmnd *srb ) -{ - return 0; -} #undef SPRINTF #define SPRINTF(args...) { if (pos < (buffer + length)) pos += sprintf (pos, ## args); } @@ -623,9 +711,9 @@ int usb_scsi_proc_info (char *buffer, char **start, off_t offset, int length, in if (inout) return length; - if (!(vendor = usb_string(us->pusb_dev, us->pusb_dev->descriptor.iManufacturer))) + if (!us->pusb_dev || !(vendor = usb_string(us->pusb_dev, us->pusb_dev->descriptor.iManufacturer))) vendor = "?"; - if (!(product = usb_string(us->pusb_dev, us->pusb_dev->descriptor.iProduct))) + if (!us->pusb_dev || !(product = usb_string(us->pusb_dev, us->pusb_dev->descriptor.iProduct))) product = "?"; switch (us->protocol) { @@ -677,7 +765,7 @@ static Scsi_Host_Template my_host_template = { us_queuecommand, NULL, /* eh_strategy */ us_abort, - us_device_reset, + us_bus_reset, us_bus_reset, us_host_reset, NULL, /* abort */ @@ -695,6 +783,25 @@ static Scsi_Host_Template my_host_template = { TRUE /* emulated */ }; +static unsigned char sense_notready[] = { + 0x70, /* current error */ + 0x00, + 0x02, /* not ready */ + 0x00, + 0x00, + 10, /* additional length */ + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, /* not ready */ + 0x03, /* manual intervention */ + 0x00, + 0x00, + 0x00, + 0x00 +}; + static int usbscsi_control_thread(void * __us) { struct us_data *us = (struct us_data *)__us; @@ -710,7 +817,7 @@ static int usbscsi_control_thread(void * __us) exit_files(current); //exit_fs(current); - sprintf(current->comm, "usbscsi%d", us->host_no); + sprintf(current->comm, "usbscsi%d", us->host_number); unlock_kernel(); @@ -727,18 +834,160 @@ static int usbscsi_control_thread(void * __us) switch (action) { case US_ACT_COMMAND : - if (!us->pusb_dev || us->srb->target || us->srb->lun) { + if (us->srb->target || us->srb->lun) { /* bad device */ US_DEBUGP( "Bad device number (%d/%d) or dev %x\n", us->srb->target, us->srb->lun, (unsigned int)us->pusb_dev); us->srb->result = DID_BAD_TARGET << 16; + } else if (!us->pusb_dev) { + + /* our device has gone - pretend not ready */ + + if (us->srb->cmnd[0] == REQUEST_SENSE) { + memcpy(us->srb->request_buffer, sense_notready, sizeof(sense_notready)); + us->srb->result = DID_OK << 16; + } else { + us->srb->result = (DID_OK << 16) | 2; + } } else { US_DEBUG(us_show_command(us->srb)); + + /* check for variable length - do properly if so */ + if (us->filter && us->filter->command) us->srb->result = us->filter->command(us->fdata, us->srb); - else + else if (us->srb->cmnd[0] == START_STOP && + us->pusb_dev->descriptor.idProduct == 0x0001 && + us->pusb_dev->descriptor.idVendor == 0x04e6) + us->srb->result = DID_OK << 16; + else { + unsigned int savelen = us->srb->request_bufflen; + unsigned int saveallocation; + + switch (us->srb->cmnd[0]) { + case REQUEST_SENSE: + if (us->srb->request_bufflen > 18) + us->srb->request_bufflen = 18; + else + break; + saveallocation = us->srb->cmnd[4]; + us->srb->cmnd[4] = 18; + break; + + case INQUIRY: + if (us->srb->request_bufflen > 36) + us->srb->request_bufflen = 36; + else + break; + saveallocation = us->srb->cmnd[4]; + us->srb->cmnd[4] = 36; + break; + + case MODE_SENSE: + if (us->srb->request_bufflen > 4) + us->srb->request_bufflen = 4; + else + break; + saveallocation = us->srb->cmnd[4]; + us->srb->cmnd[4] = 4; + break; + + case LOG_SENSE: + case MODE_SENSE_10: + if (us->srb->request_bufflen > 8) + us->srb->request_bufflen = 8; + else + break; + saveallocation = (us->srb->cmnd[7] << 8) | us->srb->cmnd[8]; + us->srb->cmnd[7] = 0; + us->srb->cmnd[8] = 8; + break; + + default: + break; + } us->srb->result = us->pop(us->srb); + + if (savelen != us->srb->request_bufflen && + us->srb->result == (DID_OK << 16)) { + unsigned char *p = (unsigned char *)us->srb->request_buffer; + unsigned int length; + + /* set correct length and retry */ + switch (us->srb->cmnd[0]) { + case REQUEST_SENSE: + /* simply return 18 bytes */ + p[7] = 10; + length = us->srb->request_bufflen;; + break; + + case INQUIRY: + length = p[4] + 5 > savelen ? savelen : p[4] + 5; + us->srb->cmnd[4] = length; + break; + + case MODE_SENSE: + length = p[0] + 4 > savelen ? savelen : p[0] + 4; + us->srb->cmnd[4] = 4; + break; + + case LOG_SENSE: + length = ((p[2] << 8) + p[3]) + 4 > savelen ? savelen : ((p[2] << 8) + p[3]) + 4; + us->srb->cmnd[7] = length >> 8; + us->srb->cmnd[8] = length; + break; + + case MODE_SENSE_10: + length = ((p[0] << 8) + p[1]) + 8 > savelen ? savelen : ((p[0] << 8) + p[1]) + 8; + us->srb->cmnd[7] = length >> 8; + us->srb->cmnd[8] = length; + break; + } + + US_DEBUGP("Old/New length = %d/%d\n", savelen, length); + + if (us->srb->request_bufflen != length) { + us->srb->request_bufflen = length; + us->srb->result = us->pop(us->srb); + } + /* reset back to original values */ + + us->srb->request_bufflen = savelen; + switch (us->srb->cmnd[0]) { + case REQUEST_SENSE: + case INQUIRY: + case MODE_SENSE: + us->srb->cmnd[4] = saveallocation; + break; + + case LOG_SENSE: + case MODE_SENSE_10: + us->srb->cmnd[7] = saveallocation >> 8; + us->srb->cmnd[8] = saveallocation; + break; + } + } + /* force attention on first command */ + if (!us->attention_done) { + if (us->srb->cmnd[0] == REQUEST_SENSE) { + if (us->srb->result == (DID_OK << 16)) { + unsigned char *p = (unsigned char *)us->srb->request_buffer; + + us->attention_done = 1; + if ((p[2] & 0x0f) != UNIT_ATTENTION) { + p[2] = UNIT_ATTENTION; + p[12] = 0x29; /* power on, reset or bus-reset */ + p[13] = 0; + } + } + } else if (us->srb->cmnd[0] != INQUIRY && + us->srb->result == (DID_OK << 16)) { + us->srb->result |= 2; /* force check condition */ + } + } + } } us->srb->scsi_done(us->srb); + us->srb = NULL; break; case US_ACT_ABORT : @@ -820,7 +1069,7 @@ static int scsi_probe(struct usb_device *dev) if (dev->descriptor.idVendor == 0x04e6 && dev->descriptor.idProduct == 0x0001) { /* shuttle E-USB */ - protocol = US_PR_ZIP; + protocol = US_PR_CB; subclass = US_SC_8070; /* an assumption */ } else if (dev->descriptor.bDeviceClass != 0 || dev->config->altsetting->interface->bInterfaceClass != 8 || @@ -835,11 +1084,15 @@ static int scsi_probe(struct usb_device *dev) usb_string(dev, dev->descriptor.iSerialNumber) ) { make_guid(guid, dev->descriptor.idVendor, dev->descriptor.idProduct, usb_string(dev, dev->descriptor.iSerialNumber)); - for (ss = us_list; ss; ss = ss->next) { - if (GUID_EQUAL(guid, ss->guid)) { - US_DEBUGP("Found existing GUID " GUID_FORMAT "\n", GUID_ARGS(guid)); - break; - } + } else { + make_guid(guid, dev->descriptor.idVendor, dev->descriptor.idProduct, + "0"); + } + for (ss = us_list; ss; ss = ss->next) { + if (!ss->pusb_dev && GUID_EQUAL(guid, ss->guid)) { + US_DEBUGP("Found existing GUID " GUID_FORMAT "\n", GUID_ARGS(guid)); + flags = ss->flags; + break; } } } @@ -865,6 +1118,7 @@ static int scsi_probe(struct usb_device *dev) ss->subclass = interface->bInterfaceSubClass; ss->protocol = interface->bInterfaceProtocol; } + ss->attention_done = 0; /* set the protocol op */ @@ -873,16 +1127,19 @@ static int scsi_probe(struct usb_device *dev) case US_PR_CB: US_DEBUGPX("Control/Bulk\n"); ss->pop = pop_CBI; + ss->pop_reset = pop_CB_reset; break; case US_PR_CBI: US_DEBUGPX("Control/Bulk/Interrupt\n"); ss->pop = pop_CBI; + ss->pop_reset = pop_CB_reset; break; default: US_DEBUGPX("Bulk\n"); ss->pop = pop_Bulk; + ss->pop_reset = pop_Bulk_reset; break; } @@ -907,6 +1164,7 @@ static int scsi_probe(struct usb_device *dev) /* exit if strange looking */ if (usb_set_configuration(dev, dev->config[0].bConfigurationValue) || + usb_set_interface(dev, interface->bInterfaceNumber, 0) || !ss->ep_in || !ss->ep_out || (ss->protocol == US_PR_CBI && ss->ep_int == 0)) { US_DEBUGP("Problems with device\n"); if (ss->host) { @@ -933,13 +1191,8 @@ static int scsi_probe(struct usb_device *dev) /* make unique id if possible */ - if (dev->descriptor.iSerialNumber && - usb_string(dev, dev->descriptor.iSerialNumber) ) { - make_guid(ss->guid, dev->descriptor.idVendor, dev->descriptor.idProduct, - usb_string(dev, dev->descriptor.iSerialNumber)); - } - US_DEBUGP("New GUID " GUID_FORMAT "\n", GUID_ARGS(guid)); + memcpy(ss->guid, guid, sizeof(guid)); /* set class specific stuff */ @@ -986,9 +1239,30 @@ static int scsi_probe(struct usb_device *dev) (struct us_data *)htmplt->proc_dir = ss; - if (ss->protocol == US_PR_CBI) + + if (dev->descriptor.idVendor == 0x04e6 && + dev->descriptor.idProduct == 0x0001) { + devrequest dr; + __u8 qstat[2]; + + /* shuttle E-USB */ + dr.requesttype = 0xC0; + dr.request = 1; + dr.index = 0; + dr.value = 0; + dr.length = 0; + ss->pusb_dev->bus->op->control_msg(ss->pusb_dev, usb_rcvctrlpipe(dev,0), &dr, qstat, 2); + US_DEBUGP("C0 status %x %x\n", qstat[0], qstat[1]); + init_waitqueue_head(&ss->ip_waitq); + ss->pusb_dev->bus->op->request_irq(ss->pusb_dev, + usb_rcvctrlpipe(ss->pusb_dev, ss->ep_int), + pop_CBI_irq, 0, (void *)ss); + interruptible_sleep_on_timeout(&ss->ip_waitq, HZ*5); + + } else if (ss->protocol == US_PR_CBI) init_waitqueue_head(&ss->ip_waitq); + /* start up our thread */ { diff --git a/drivers/usb/usb_scsi_debug.c b/drivers/usb/usb_scsi_debug.c index 2ca847c08bf8..634f4c0f6e5f 100644 --- a/drivers/usb/usb_scsi_debug.c +++ b/drivers/usb/usb_scsi_debug.c @@ -95,7 +95,7 @@ void us_show_command(Scsi_Cmnd *srb) case READ_ELEMENT_STATUS: what = "READ_ELEMENT_STATUS"; break; case SEND_VOLUME_TAG: what = "SEND_VOLUME_TAG"; break; case WRITE_LONG_2: what = "WRITE_LONG_2"; break; - default: what = "??"; break; + default: break; } printk(KERN_DEBUG USB_SCSI "Command %s (%d bytes)\n", what, srb->cmd_len); printk(KERN_DEBUG USB_SCSI " %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n", diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 3a1c78ef0901..ee08ff451990 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -63,7 +63,6 @@ struct inode_operations affs_dir_inode_operations = { NULL, /* truncate */ NULL, /* permissions */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/affs/file.c b/fs/affs/file.c index 1961b4ec3493..bb1ce69c85d4 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -80,7 +80,6 @@ struct inode_operations affs_file_inode_operations = { affs_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; @@ -121,7 +120,6 @@ struct inode_operations affs_file_inode_operations_ofs = { affs_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/autofs/dir.c b/fs/autofs/dir.c index 425df6577342..f6ccf8419977 100644 --- a/fs/autofs/dir.c +++ b/fs/autofs/dir.c @@ -79,7 +79,6 @@ struct inode_operations autofs_dir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/autofs/root.c b/fs/autofs/root.c index c1b57ec6e969..011e3286f956 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -60,7 +60,6 @@ struct inode_operations autofs_root_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index a4cb5154f5a0..0e46db365480 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -55,6 +55,5 @@ struct inode_operations autofs_symlink_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 33560caa4b94..89711607bcc8 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -60,13 +60,13 @@ struct inode_operations bad_inode_ops = EIO_ERROR, /* rename */ EIO_ERROR, /* readlink */ bad_follow_link, /* follow_link */ + EIO_ERROR, /* bmap */ EIO_ERROR, /* readpage */ EIO_ERROR, /* writepage */ - EIO_ERROR, /* bmap */ + EIO_ERROR, /* flushpage */ EIO_ERROR, /* truncate */ EIO_ERROR, /* permission */ EIO_ERROR, /* smap */ - EIO_ERROR, /* update_page */ EIO_ERROR /* revalidate */ }; diff --git a/fs/block_dev.c b/fs/block_dev.c index 13b3f534debc..664522ab848d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -124,6 +124,7 @@ ssize_t block_write(struct file * filp, const char * buf, } buffercount=0; } + balance_dirty(dev); if(write_error) break; } diff --git a/fs/buffer.c b/fs/buffer.c index b6474f45183d..75f6486a016a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -24,6 +24,8 @@ * - RMK */ +#include +#include #include #include #include @@ -771,25 +773,44 @@ static void file_buffer(struct buffer_head *bh, int list) /* * if a new dirty buffer is created we need to balance bdflush. + * + * in the future we might want to make bdflush aware of different + * pressures on different devices - thus the (currently unused) + * 'dev' parameter. */ -static inline void balance_dirty (kdev_t dev) +void balance_dirty(kdev_t dev) { - int too_many = (nr_buffers * bdf_prm.b_un.nfract/100); - - /* This buffer is dirty, maybe we need to start flushing. - * If too high a percentage of the buffers are dirty... - */ - if (nr_buffers_type[BUF_DIRTY] > too_many) { - wakeup_bdflush(1); + int dirty = nr_buffers_type[BUF_DIRTY]; + int ndirty = bdf_prm.b_un.ndirty; + + if (dirty > ndirty) { + int wait = 0; + if (dirty > 2*ndirty) + wait = 1; + wakeup_bdflush(wait); } +} - /* If this is a loop device, and - * more than half of the buffers are dirty... - * (Prevents no-free-buffers deadlock with loop device.) - */ - if (MAJOR(dev) == LOOP_MAJOR && - nr_buffers_type[BUF_DIRTY]*2>nr_buffers) - wakeup_bdflush(1); +atomic_t too_many_dirty_buffers; + +static inline void __mark_dirty(struct buffer_head *bh, int flag) +{ + set_writetime(bh, flag); + refile_buffer(bh); + if (atomic_read(&too_many_dirty_buffers)) + balance_dirty(bh->b_dev); +} + +void __mark_buffer_dirty(struct buffer_head *bh, int flag) +{ + __mark_dirty(bh, flag); +} + +void __atomic_mark_buffer_dirty(struct buffer_head *bh, int flag) +{ + lock_kernel(); + __mark_dirty(bh, flag); + unlock_kernel(); } /* @@ -800,21 +821,19 @@ void refile_buffer(struct buffer_head * buf) { int dispose; - if(buf->b_dev == B_FREE) { + if (buf->b_dev == B_FREE) { printk("Attempt to refile free buffer\n"); return; } + + dispose = BUF_CLEAN; + if (buffer_locked(buf)) + dispose = BUF_LOCKED; if (buffer_dirty(buf)) dispose = BUF_DIRTY; - else if (buffer_locked(buf)) - dispose = BUF_LOCKED; - else - dispose = BUF_CLEAN; - if(dispose != buf->b_list) { + + if (dispose != buf->b_list) file_buffer(buf, dispose); - if (dispose == BUF_DIRTY) - balance_dirty(buf->b_dev); - } } /* @@ -1239,7 +1258,9 @@ static int create_page_buffers (int rw, struct page *page, kdev_t dev, int b[], * They show up in the buffer hash table and are registered in * page->buffers. */ + lock_kernel(); head = create_buffers(page_address(page), size, 1); + unlock_kernel(); if (page->buffers) BUG(); if (!head) @@ -1275,7 +1296,7 @@ static int create_page_buffers (int rw, struct page *page, kdev_t dev, int b[], * we have truncated the file and are going to free the * blocks on-disk.. */ -int generic_block_flushpage(struct inode *inode, struct page *page, unsigned long offset) +int block_flushpage(struct inode *inode, struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; unsigned int curr_off = 0; @@ -1284,6 +1305,7 @@ int generic_block_flushpage(struct inode *inode, struct page *page, unsigned lon BUG(); if (!page->buffers) return 0; + lock_kernel(); head = page->buffers; bh = head; @@ -1311,21 +1333,25 @@ int generic_block_flushpage(struct inode *inode, struct page *page, unsigned lon /* * subtle. We release buffer-heads only if this is - * the 'final' flushpage. We invalidate the bmap - * cached value in all cases. + * the 'final' flushpage. We have invalidated the bmap + * cached value unconditionally, so real IO is not + * possible anymore. */ if (!offset) try_to_free_buffers(page); + unlock_kernel(); return 0; } -static inline void create_empty_buffers (struct page *page, +static void create_empty_buffers (struct page *page, struct inode *inode, unsigned long blocksize) { struct buffer_head *bh, *head, *tail; + lock_kernel(); head = create_buffers(page_address(page), blocksize, 1); + unlock_kernel(); if (page->buffers) BUG(); @@ -1341,6 +1367,10 @@ static inline void create_empty_buffers (struct page *page, get_page(page); } +/* + * block_write_full_page() is SMP-safe - currently it's still + * being called with the kernel lock held, but the code is ready. + */ int block_write_full_page (struct file *file, struct page *page, fs_getblock_t fs_get_block) { struct dentry *dentry = file->f_dentry; @@ -1381,12 +1411,13 @@ int block_write_full_page (struct file *file, struct page *page, fs_getblock_t f bh->b_state = (1<b_end_io = end_buffer_io_sync; set_bit(BH_Uptodate, &bh->b_state); } - mark_buffer_dirty(bh, 0); + atomic_mark_buffer_dirty(bh,0); bh = bh->b_this_page; block++; @@ -1399,12 +1430,12 @@ out: return err; } -int block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block) +int block_write_partial_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block) { struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; unsigned long block; - int err, created; + int err, created, partial; unsigned long blocksize, start_block, end_block; unsigned long start_offset, start_bytes, end_bytes; unsigned long bbits, phys, blocks, i, len; @@ -1412,7 +1443,6 @@ int block_write_one_page (struct file *file, struct page *page, unsigned long of char * target_buf; target_buf = (char *)page_address(page) + offset; - lock_kernel(); if (!PageLocked(page)) BUG(); @@ -1449,40 +1479,16 @@ int block_write_one_page (struct file *file, struct page *page, unsigned long of i = 0; bh = head; + partial = 0; do { if (!bh) BUG(); if ((i < start_block) || (i > end_block)) { + if (!buffer_uptodate(bh)) + partial = 1; goto skip; } - unlock_kernel(); - - err = -EFAULT; - if (start_offset) { - len = start_bytes; - start_offset = 0; - } else - if (end_bytes && (i == end_block)) { - len = end_bytes; - end_bytes = 0; - } else { - /* - * Overwritten block. - */ - len = blocksize; - } - if (copy_from_user(target_buf, buf, len)) - goto out_nolock; - target_buf += len; - buf += len; - - /* - * we dirty buffers only after copying the data into - * the page - this way we can dirty the buffer even if - * the bh is still doing IO. - */ - lock_kernel(); if (!bh->b_blocknr) { err = -EIO; down(&inode->i_sem); @@ -1496,12 +1502,16 @@ int block_write_one_page (struct file *file, struct page *page, unsigned long of /* * if partially written block which has contents on * disk, then we have to read it first. + * We also rely on the fact that filesystem holes + * cannot be written. */ if (!created && (start_offset || (end_bytes && (i == end_block)))) { bh->b_state = 0; ll_rw_block(READ, 1, &bh); + lock_kernel(); wait_on_buffer(bh); + unlock_kernel(); err = -EIO; if (!buffer_uptodate(bh)) goto out; @@ -1515,19 +1525,48 @@ int block_write_one_page (struct file *file, struct page *page, unsigned long of bh->b_end_io = end_buffer_io_sync; set_bit(BH_Uptodate, &bh->b_state); } - mark_buffer_dirty(bh, 0); + + err = -EFAULT; + if (start_offset) { + len = start_bytes; + start_offset = 0; + } else + if (end_bytes && (i == end_block)) { + len = end_bytes; + end_bytes = 0; + } else { + /* + * Overwritten block. + */ + len = blocksize; + } + if (copy_from_user(target_buf, buf, len)) + goto out; + target_buf += len; + buf += len; + + /* + * we dirty buffers only after copying the data into + * the page - this way we can dirty the buffer even if + * the bh is still doing IO. + */ + atomic_mark_buffer_dirty(bh,0); skip: i++; block++; bh = bh->b_this_page; } while (bh != head); - unlock_kernel(); - SetPageUptodate(page); + /* + * is this a partial write that happened to make all buffers + * uptodate then we can optimize away a bogus readpage() for + * the next read(). Here we 'discover' wether the page went + * uptodate as a result of this (potentially partial) write. + */ + if (!partial) + SetPageUptodate(page); return bytes; out: - unlock_kernel(); -out_nolock: ClearPageUptodate(page); return err; } @@ -1537,11 +1576,14 @@ out_nolock: * This function expects the page to be locked and may return * before I/O is complete. You then have to check page->locked, * page->uptodate, and maybe wait on page->wait. + * + * brw_page() is SMP-safe, although it's being called with the + * kernel lock held - but the code is ready. */ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap) { struct buffer_head *head, *bh, *arr[MAX_BUF_PER_PAGE]; - int nr, fresh, block; + int nr, fresh /* temporary debugging flag */, block; if (!PageLocked(page)) panic("brw_page: page not locked for I/O"); @@ -1590,7 +1632,7 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap) BUG(); } set_bit(BH_Uptodate, &bh->b_state); - mark_buffer_dirty(bh, 0); + atomic_mark_buffer_dirty(bh, 0); arr[nr++] = bh; } bh = bh->b_this_page; @@ -1600,20 +1642,15 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap) if ((rw == READ) && nr) { if (Page_Uptodate(page)) BUG(); - unlock_kernel(); ll_rw_block(rw, nr, arr); - lock_kernel(); } else { if (!nr && rw == READ) { SetPageUptodate(page); page->owner = (int)current; UnlockPage(page); } - if (nr && (rw == WRITE)) { - unlock_kernel(); + if (nr && (rw == WRITE)) ll_rw_block(rw, nr, arr); - lock_kernel(); - } } return 0; } @@ -1648,33 +1685,70 @@ void mark_buffer_uptodate(struct buffer_head * bh, int on) * mark_buffer_uptodate() functions propagate buffer state into the * page struct once IO has completed. */ -int generic_readpage(struct file * file, struct page * page) +int block_read_full_page(struct file * file, struct page * page) { struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; - unsigned long block; - int *p, nr[PAGE_SIZE/512]; - int i; + unsigned long iblock, phys_block; + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; + unsigned int blocksize, blocks; + int nr; - if (page->buffers) { - printk("hm, no brw_page(%p) because IO already started.\n", - page); - goto out; - } + if (!PageLocked(page)) + PAGE_BUG(page); + blocksize = inode->i_sb->s_blocksize; + if (!page->buffers) + create_empty_buffers(page, inode, blocksize); + head = page->buffers; - i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; - block = page->offset >> inode->i_sb->s_blocksize_bits; - p = nr; + blocks = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; + iblock = page->offset >> inode->i_sb->s_blocksize_bits; + page->owner = -1; + head = page->buffers; + bh = head; + nr = 0; do { - *p = inode->i_op->bmap(inode, block); - i--; - block++; - p++; - } while (i > 0); + phys_block = bh->b_blocknr; + /* + * important, we have to retry buffers that already have + * their bnr cached but had an IO error! + */ + if (!buffer_uptodate(bh)) { + phys_block = inode->i_op->bmap(inode, iblock); + /* + * this is safe to do because we hold the page lock: + */ + if (phys_block) { + init_buffer(bh, inode->i_dev, phys_block, + end_buffer_io_async, NULL); + arr[nr] = bh; + nr++; + } else { + /* + * filesystem 'hole' represents zero-contents: + */ + memset(bh->b_data, 0, blocksize); + set_bit(BH_Uptodate, &bh->b_state); + } + } + iblock++; + bh = bh->b_this_page; + } while (bh != head); - /* IO start */ - brw_page(READ, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1); -out: + ++current->maj_flt; + if (nr) { + if (Page_Uptodate(page)) + BUG(); + ll_rw_block(READ, nr, arr); + } else { + /* + * all buffers are uptodate - we can set the page + * uptodate as well. + */ + SetPageUptodate(page); + page->owner = (int)current; + UnlockPage(page); + } return 0; } @@ -1753,8 +1827,6 @@ int try_to_free_buffers(struct page * page) tmp = tmp->b_this_page; if (!buffer_busy(p)) continue; - - wakeup_bdflush(0); return 0; } while (tmp != bh); @@ -2151,9 +2223,14 @@ int bdflush(void * unused) run_task_queue(&tq_disk); wake_up(&bdflush_done); - /* If there are still a lot of dirty buffers around, skip the sleep - and flush some more */ - if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) { + /* + * If there are still a lot of dirty buffers around, + * skip the sleep and flush some more + */ + if ((ndirty == 0) || (nr_buffers_type[BUF_DIRTY] <= + nr_buffers * bdf_prm.b_un.nfract/100)) { + + atomic_set(&too_many_dirty_buffers, 0); spin_lock_irq(¤t->sigmask_lock); flush_signals(current); spin_unlock_irq(¤t->sigmask_lock); diff --git a/fs/devices.c b/fs/devices.c index 8d9200f8794f..934fe290fd4a 100644 --- a/fs/devices.c +++ b/fs/devices.c @@ -277,11 +277,14 @@ struct inode_operations blkdev_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* diff --git a/fs/devpts/root.c b/fs/devpts/root.c index c284f1d97bfb..c1c1a600005b 100644 --- a/fs/devpts/root.c +++ b/fs/devpts/root.c @@ -57,7 +57,6 @@ struct inode_operations devpts_root_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index a6753d27682f..59f068b5e759 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -67,12 +67,14 @@ struct inode_operations ext2_dir_inode_operations = { ext2_rename, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ ext2_permission, /* permission */ - NULL /* smap */ + NULL, /* smap */ + NULL /* revalidate */ }; int ext2_check_dir_entry (const char * function, struct inode * dir, diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 806859ba0be2..c90419ce3ef7 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -113,7 +113,7 @@ static int ext2_writepage (struct file * file, struct page * page) static long ext2_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf) { - return block_write_one_page(file, page, offset, bytes, buf, ext2_getblk_block); + return block_write_partial_page(file, page, offset, bytes, buf, ext2_getblk_block); } /* @@ -122,7 +122,14 @@ static long ext2_write_one_page (struct file *file, struct page *page, unsigned static ssize_t ext2_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { - return generic_file_write(file, buf, count, ppos, ext2_write_one_page); + ssize_t retval = generic_file_write(file, buf, count, ppos, ext2_write_one_page); + if (retval > 0) { + struct inode *inode = file->f_dentry->d_inode; + remove_suid(inode); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + mark_inode_dirty(inode); + } + return retval; } /* @@ -188,13 +195,12 @@ struct inode_operations ext2_file_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ - generic_readpage, /* readpage */ - ext2_writepage, /* writepage */ ext2_bmap, /* bmap */ + block_read_full_page, /* readpage */ + ext2_writepage, /* writepage */ + block_flushpage, /* flushpage */ ext2_truncate, /* truncate */ ext2_permission, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ - generic_block_flushpage,/* flushpage */ }; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1258a39b63e6..02fb5b7b7eb5 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -31,6 +31,7 @@ #include #include #include +#include static int ext2_update_inode(struct inode * inode, int do_sync); @@ -131,58 +132,66 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) int ext2_bmap (struct inode * inode, int block) { - int i; + int i, ret; int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); int addr_per_block_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb); + ret = 0; + lock_kernel(); if (block < 0) { ext2_warning (inode->i_sb, "ext2_bmap", "block < 0"); - return 0; + goto out; } if (block >= EXT2_NDIR_BLOCKS + addr_per_block + (1 << (addr_per_block_bits * 2)) + ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) { ext2_warning (inode->i_sb, "ext2_bmap", "block > big"); - return 0; + goto out; + } + if (block < EXT2_NDIR_BLOCKS) { + ret = inode_bmap (inode, block); + goto out; } - if (block < EXT2_NDIR_BLOCKS) - return inode_bmap (inode, block); block -= EXT2_NDIR_BLOCKS; if (block < addr_per_block) { i = inode_bmap (inode, EXT2_IND_BLOCK); if (!i) - return 0; - return block_bmap (bread (inode->i_dev, i, + goto out; + ret = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), block); + goto out; } block -= addr_per_block; if (block < (1 << (addr_per_block_bits * 2))) { i = inode_bmap (inode, EXT2_DIND_BLOCK); if (!i) - return 0; + goto out; i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), block >> addr_per_block_bits); if (!i) - return 0; - return block_bmap (bread (inode->i_dev, i, + goto out; + ret = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), - block & (addr_per_block - 1)); + block & (addr_per_block - 1)); } block -= (1 << (addr_per_block_bits * 2)); i = inode_bmap (inode, EXT2_TIND_BLOCK); if (!i) - return 0; + goto out; i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), block >> (addr_per_block_bits * 2)); if (!i) - return 0; + goto out; i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), (block >> addr_per_block_bits) & (addr_per_block - 1)); if (!i) - return 0; - return block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), + goto out; + ret = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), block & (addr_per_block - 1)); +out: + unlock_kernel(); + return ret; } int ext2_bmap_create (struct inode * inode, int block) @@ -461,18 +470,20 @@ int ext2_getblk_block (struct inode * inode, long block, unsigned long b; unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); int addr_per_block_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb); - int phys_block; + int phys_block, ret; + lock_kernel(); + ret = 0; *err = -EIO; if (block < 0) { ext2_warning (inode->i_sb, "ext2_getblk", "block < 0"); - return 0; + goto abort; } if (block > EXT2_NDIR_BLOCKS + addr_per_block + (1 << (addr_per_block_bits * 2)) + ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) { ext2_warning (inode->i_sb, "ext2_getblk", "block > big"); - return 0; + goto abort; } /* * If this is a sequential block allocation, set the next_alloc_block @@ -527,13 +538,14 @@ int ext2_getblk_block (struct inode * inode, long block, inode->i_sb->s_blocksize, b, err, 0, &phys_block, created); out: - if (!phys_block) { - return 0; - } - if (*err) { - return 0; - } - return phys_block; + if (!phys_block) + goto abort; + if (*err) + goto abort; + ret = phys_block; +abort: + unlock_kernel(); + return ret; } struct buffer_head * ext2_getblk (struct inode * inode, long block, diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 826cb41768e0..b0ebcb91bd48 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -43,12 +43,14 @@ struct inode_operations ext2_symlink_inode_operations = { NULL, /* rename */ ext2_readlink, /* readlink */ ext2_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL /* smap */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct dentry * ext2_follow_link(struct dentry * dentry, diff --git a/fs/fifo.c b/fs/fifo.c index ecb27722efbc..e18183fc93f2 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -179,11 +179,14 @@ struct inode_operations fifo_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/hfs/dir_nat.c b/fs/hfs/dir_nat.c index be6974b6663f..21d4ca9afdee 100644 --- a/fs/hfs/dir_nat.c +++ b/fs/hfs/dir_nat.c @@ -99,7 +99,6 @@ struct inode_operations hfs_nat_ndir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; @@ -122,7 +121,6 @@ struct inode_operations hfs_nat_hdir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/hfs/file.c b/fs/hfs/file.c index 00bebd017d9e..d3796e275a73 100644 --- a/fs/hfs/file.c +++ b/fs/hfs/file.c @@ -69,7 +69,6 @@ struct inode_operations hfs_file_inode_operations = { hfs_file_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/hfs/file_cap.c b/fs/hfs/file_cap.c index b3a58912c708..789073d196d7 100644 --- a/fs/hfs/file_cap.c +++ b/fs/hfs/file_cap.c @@ -83,7 +83,6 @@ struct inode_operations hfs_cap_info_inode_operations = { cap_info_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidata */ }; diff --git a/fs/hfs/file_hdr.c b/fs/hfs/file_hdr.c index c1e1534b04db..d112b34981b0 100644 --- a/fs/hfs/file_hdr.c +++ b/fs/hfs/file_hdr.c @@ -85,7 +85,6 @@ struct inode_operations hfs_hdr_inode_operations = { hdr_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 99bfa1004cfd..17984d667e3d 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -48,7 +48,6 @@ static const struct inode_operations hpfs_file_iops = &hpfs_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; @@ -91,7 +90,6 @@ static const struct inode_operations hpfs_dir_iops = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; @@ -115,7 +113,6 @@ const struct inode_operations hpfs_symlink_iops = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/isofs/file.c b/fs/isofs/file.c index e2b4405d92eb..ce85b367a765 100644 --- a/fs/isofs/file.c +++ b/fs/isofs/file.c @@ -48,9 +48,10 @@ struct inode_operations isofs_file_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ - generic_readpage, /* readpage */ - NULL, /* writepage */ isofs_bmap, /* bmap */ + block_read_full_page, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ NULL, /* truncate */ NULL /* permission */ }; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 1d88aaea8772..01d37a849be2 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -909,7 +910,7 @@ int isofs_statfs (struct super_block *sb, struct statfs *buf, int bufsiz) return copy_to_user(buf, &tmp, bufsiz) ? -EFAULT : 0; } -int isofs_bmap(struct inode * inode,int block) +static int do_isofs_bmap(struct inode * inode,int block) { off_t b_off, offset, size; struct inode *ino; @@ -991,6 +992,15 @@ int isofs_bmap(struct inode * inode,int block) return (b_off - offset + firstext) >> ISOFS_BUFFER_BITS(inode); } +int isofs_bmap(struct inode * inode,int block) +{ + int retval; + + lock_kernel(); + retval = do_isofs_bmap(inode, block); + unlock_kernel(); + return retval; +} static void test_and_set_uid(uid_t *p, uid_t value) { diff --git a/fs/minix/file.c b/fs/minix/file.c index 55ed5fd5d93b..d6b7ecb17d9e 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -110,7 +110,6 @@ struct inode_operations minix_file_inode_operations = { minix_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ - generic_block_flushpage,/* flushpage */ + block_flushpage, /* flushpage */ }; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index be1c34dac8bb..8175724d7b38 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -633,7 +633,6 @@ struct inode_operations msdos_dir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0aa50559b449..680f011a149e 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -98,7 +98,6 @@ struct inode_operations ncp_dir_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6f600dd5d7eb..d41505862945 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -78,13 +78,13 @@ struct inode_operations nfs_dir_inode_operations = { nfs_rename, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ nfs_revalidate, /* revalidate */ }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f10ce96edaa9..d3066f4cd6f9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -71,15 +71,14 @@ struct inode_operations nfs_file_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ nfs_readpage, /* readpage */ nfs_writepage, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ nfs_revalidate, /* revalidate */ - NULL, /* flushpage */ }; /* Hack for future NFS swap support */ diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c0a7adaee9c9..843f6b23eae8 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -222,6 +223,7 @@ nfs_readpage(struct file *file, struct page *page) struct inode *inode = dentry->d_inode; int error; + lock_kernel(); dprintk("NFS: nfs_readpage (%p %ld@%ld)\n", page, PAGE_SIZE, page->offset); get_page(page); @@ -254,5 +256,6 @@ out_error: out_free: free_page(page_address(page)); out: + unlock_kernel(); return error; } diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index b2ac2f6f8d0e..c6fc4d685229 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -43,11 +43,14 @@ struct inode_operations nfs_symlink_inode_operations = { NULL, /* rename */ nfs_readlink, /* readlink */ nfs_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* Symlink caching in the page cache is even more simplistic diff --git a/fs/nfs/write.c b/fs/nfs/write.c index de5ab535db58..911a5261e738 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -55,6 +55,7 @@ #include #include #include +#include #define NFS_PARANOIA 1 #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -93,6 +94,7 @@ nfs_writepage_sync(struct dentry *dentry, struct inode *inode, u8 *buffer; struct nfs_fattr fattr; + lock_kernel(); dprintk("NFS: nfs_writepage_sync(%s/%s %d@%ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, count, page->offset + offset); @@ -153,6 +155,7 @@ io_error: inode->i_ino, fattr.fileid); } + unlock_kernel(); return written? written : result; } diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c index a43e071fef48..d9430c2cca36 100644 --- a/fs/ntfs/fs.c +++ b/fs/ntfs/fs.c @@ -445,7 +445,6 @@ static struct inode_operations ntfs_inode_operations_nobmap = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; @@ -628,7 +627,6 @@ static struct inode_operations ntfs_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; @@ -677,7 +675,6 @@ static struct inode_operations ntfs_dir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/pipe.c b/fs/pipe.c index dd4f6cd19665..9830418cc285 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -461,11 +461,14 @@ struct inode_operations pipe_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; int do_pipe(int *fd) diff --git a/fs/proc/array.c b/fs/proc/array.c index d2ec8eae670b..7f4aca723ba8 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -1519,11 +1519,14 @@ struct inode_operations proc_array_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; static ssize_t arraylong_read(struct file * file, char * buf, @@ -1567,9 +1570,12 @@ struct inode_operations proc_arraylong_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/base.c b/fs/proc/base.c index c9b2d8649bf4..8579dd8c537e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -45,11 +45,14 @@ static struct inode_operations proc_base_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 1defdbae1195..2bbb51d28366 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -51,11 +51,14 @@ struct inode_operations proc_fd_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - proc_permission /* permission */ + proc_permission, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 54b16f84bdb9..4e59fed731ed 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -51,20 +51,23 @@ struct inode_operations proc_file_inode_operations = { &proc_file_operations, /* default proc file-ops */ NULL, /* create */ NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* @@ -83,11 +86,14 @@ struct inode_operations proc_net_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index ba78768b62ce..3cfccab963f4 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -72,9 +72,12 @@ struct inode_operations proc_kmsg_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/link.c b/fs/proc/link.c index 9df4de67448c..3a563982512a 100644 --- a/fs/proc/link.c +++ b/fs/proc/link.c @@ -49,11 +49,14 @@ struct inode_operations proc_link_inode_operations = { NULL, /* rename */ proc_readlink, /* readlink */ proc_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - proc_permission /* permission */ + proc_permission, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct dentry * proc_follow_link(struct dentry *dentry, diff --git a/fs/proc/mem.c b/fs/proc/mem.c index df04473833fe..0e89f7645ce5 100644 --- a/fs/proc/mem.c +++ b/fs/proc/mem.c @@ -336,9 +336,12 @@ struct inode_operations proc_mem_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - proc_permission /* permission */ + proc_permission, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/net.c b/fs/proc/net.c index a6d8c5616547..1ad226de0bb6 100644 --- a/fs/proc/net.c +++ b/fs/proc/net.c @@ -113,9 +113,12 @@ struct inode_operations proc_net_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/omirr.c b/fs/proc/omirr.c index dbf2b32b979e..562aa11c5b77 100644 --- a/fs/proc/omirr.c +++ b/fs/proc/omirr.c @@ -277,22 +277,24 @@ static struct file_operations omirr_operations = { }; struct inode_operations proc_omirr_inode_operations = { - &omirr_operations, - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL, /* permission */ - NULL /* smap */ + &omirr_operations, + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index cd4aca324eca..594f008586d1 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -57,12 +57,14 @@ struct inode_operations devtree_symlink_inode_operations = { NULL, /* rename */ devtree_readlink, /* readlink */ devtree_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL /* smap */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct dentry *devtree_follow_link(struct dentry *dentry, diff --git a/fs/proc/root.c b/fs/proc/root.c index 79622b022e2c..62f016221c63 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -71,11 +71,14 @@ struct inode_operations proc_dir_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* @@ -94,11 +97,14 @@ struct inode_operations proc_dyna_dir_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* @@ -136,11 +142,14 @@ static struct inode_operations proc_root_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* @@ -293,11 +302,14 @@ struct inode_operations proc_openprom_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; struct proc_dir_entry proc_openprom = { @@ -478,11 +490,14 @@ static struct inode_operations proc_self_inode_operations = { NULL, /* rename */ proc_self_readlink, /* readlink */ proc_self_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct inode_operations proc_link_inode_operations = { @@ -498,11 +513,14 @@ static struct inode_operations proc_link_inode_operations = { NULL, /* rename */ proc_readlink, /* readlink */ proc_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct proc_dir_entry proc_root_loadavg = { diff --git a/fs/proc/scsi.c b/fs/proc/scsi.c index 6f3ad077070d..ae2679b6d1d7 100644 --- a/fs/proc/scsi.c +++ b/fs/proc/scsi.c @@ -59,23 +59,26 @@ static struct file_operations proc_scsi_operations = { * proc directories can do almost nothing.. */ struct inode_operations proc_scsi_inode_operations = { - &proc_scsi_operations, /* default scsi directory file-ops */ - NULL, /* create */ - proc_lookup, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ +&proc_scsi_operations, /* default scsi directory file-ops */ + NULL, /* create */ + proc_lookup, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; int get_not_present_info(char *buffer, char **start, off_t offset, int length) diff --git a/fs/proc/sysvipc.c b/fs/proc/sysvipc.c index eab3e3186e03..c6e32894d19d 100644 --- a/fs/proc/sysvipc.c +++ b/fs/proc/sysvipc.c @@ -118,21 +118,24 @@ static struct file_operations proc_sysvipc_operations = { * proc directories can do almost nothing.. */ struct inode_operations proc_sysvipc_inode_operations = { - &proc_sysvipc_operations, /* default net file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ + &proc_sysvipc_operations, /* default net file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index b820642fec3c..870ebac74cf8 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -65,7 +65,6 @@ struct inode_operations smb_dir_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ smb_revalidate_inode, /* revalidate */ }; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 1a4a0add1ba0..cfb2d82da9bd 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -410,6 +410,5 @@ struct inode_operations smb_file_inode_operations = NULL, /* truncate */ smb_file_permission, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ smb_revalidate_inode, /* revalidate */ }; diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 9e806e4d1375..19443f2890c4 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -119,7 +119,6 @@ struct inode_operations sysv_file_inode_operations = { sysv_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ - generic_block_flushpage,/* flushpage */ + block_flushpage, /* flushpage */ }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 57db16baefa0..9e027cfc3734 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -182,7 +182,6 @@ struct inode_operations ufs_file_inode_operations = { ufs_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ - generic_block_flushpage,/* flushpage */ + block_flushpage, /* flushpage */ }; diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c index a780a95871a8..0f26103c47fc 100644 --- a/fs/umsdos/dir.c +++ b/fs/umsdos/dir.c @@ -838,6 +838,5 @@ struct inode_operations umsdos_dir_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/umsdos/rdir.c b/fs/umsdos/rdir.c index 3f5d109533a0..7951bb8f8274 100644 --- a/fs/umsdos/rdir.c +++ b/fs/umsdos/rdir.c @@ -253,6 +253,5 @@ struct inode_operations umsdos_rdir_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/umsdos/symlink.c b/fs/umsdos/symlink.c index 4b3678a22a2c..97ea2da411f7 100644 --- a/fs/umsdos/symlink.c +++ b/fs/umsdos/symlink.c @@ -141,7 +141,6 @@ struct inode_operations umsdos_symlink_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/include/linux/fs.h b/include/linux/fs.h index a613816aad1b..fd67e059ac86 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -596,15 +596,23 @@ struct inode_operations { struct inode *, struct dentry *); int (*readlink) (struct dentry *, char *,int); struct dentry * (*follow_link) (struct dentry *, struct dentry *, unsigned int); + /* + * the order of these functions within the VFS template has been + * changed because SMP locking has changed: from now on all bmap, + * readpage, writepage and flushpage functions are supposed to do + * whatever locking they need to get proper SMP operation - for + * now in most cases this means a lock/unlock_kernel at entry/exit. + * [The new order is also slightly more logical :)] + */ + int (*bmap) (struct inode *,int); int (*readpage) (struct file *, struct page *); int (*writepage) (struct file *, struct page *); - int (*bmap) (struct inode *,int); + int (*flushpage) (struct inode *, struct page *, unsigned long); + void (*truncate) (struct inode *); int (*permission) (struct inode *, int); int (*smap) (struct inode *,int); - int (*updatepage) (struct file *, struct page *, unsigned long, unsigned int); int (*revalidate) (struct dentry *); - int (*flushpage) (struct inode *, struct page *, unsigned long); }; struct super_operations { @@ -745,21 +753,36 @@ void mark_buffer_uptodate(struct buffer_head *, int); extern inline void mark_buffer_clean(struct buffer_head * bh) { - if (test_and_clear_bit(BH_Dirty, &bh->b_state)) { - if (bh->b_list == BUF_DIRTY) - refile_buffer(bh); - } + if (test_and_clear_bit(BH_Dirty, &bh->b_state)) + refile_buffer(bh); } +extern void FASTCALL(__mark_buffer_dirty(struct buffer_head *bh, int flag)); +extern void FASTCALL(__atomic_mark_buffer_dirty(struct buffer_head *bh, int flag)); + +#define atomic_set_buffer_dirty(bh) test_and_set_bit(BH_Dirty, &(bh)->b_state) + extern inline void mark_buffer_dirty(struct buffer_head * bh, int flag) { - if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { - set_writetime(bh, flag); - if (bh->b_list != BUF_DIRTY) - refile_buffer(bh); - } + if (!atomic_set_buffer_dirty(bh)) + __mark_buffer_dirty(bh, flag); +} + +/* + * SMP-safe version of the above - does synchronization with + * other users of buffer-cache data structures. + * + * since we test-set the dirty bit in a CPU-atomic way we also + * have optimized the common 'redirtying' case away completely. + */ +extern inline void atomic_mark_buffer_dirty(struct buffer_head * bh, int flag) +{ + if (!atomic_set_buffer_dirty(bh)) + __atomic_mark_buffer_dirty(bh, flag); } + +extern void balance_dirty(kdev_t); extern int check_disk_change(kdev_t); extern int invalidate_inodes(struct super_block *); extern void invalidate_inode_pages(struct inode *); @@ -850,14 +873,15 @@ extern int brw_page(int, struct page *, kdev_t, int [], int, int); typedef long (*writepage_t)(struct file *, struct page *, unsigned long, unsigned long, const char *); typedef int (*fs_getblock_t)(struct inode *, long, int, int *, int *); +/* Generic buffer handling for block filesystems.. */ +extern int block_read_full_page(struct file *, struct page *); +extern int block_write_full_page (struct file *, struct page *, fs_getblock_t); +extern int block_write_partial_page (struct file *, struct page *, unsigned long, unsigned long, const char *, fs_getblock_t); +extern int block_flushpage(struct inode *, struct page *, unsigned long); -extern int generic_readpage(struct file *, struct page *); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *, writepage_t); -extern int generic_block_flushpage(struct inode *, struct page *, unsigned long); -extern int block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block); -extern int block_write_full_page (struct file *file, struct page *page, fs_getblock_t fs_get_block); extern struct super_block *get_super(kdev_t); diff --git a/include/linux/swap.h b/include/linux/swap.h index f0ba314054cb..c06ddba6305e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -107,6 +107,7 @@ extern int FASTCALL(swap_count(unsigned long)); /* * Make these inline later once they are working properly. */ +extern void __delete_from_swap_cache(struct page *page); extern void delete_from_swap_cache(struct page *page); extern void free_page_and_swap_cache(unsigned long addr); diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 03c5167100ee..a57d67d8ba7b 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -167,7 +167,6 @@ EXPORT_SYMBOL(add_blkdev_randomness); EXPORT_SYMBOL(generic_file_read); EXPORT_SYMBOL(generic_file_write); EXPORT_SYMBOL(generic_file_mmap); -EXPORT_SYMBOL(generic_readpage); EXPORT_SYMBOL(file_lock_table); EXPORT_SYMBOL(posix_lock_file); EXPORT_SYMBOL(posix_test_lock); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ed98241366b9..2d4fb7169d22 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -121,11 +121,14 @@ struct inode_operations proc_sys_inode_operations = NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - proc_sys_permission + proc_sys_permission, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; extern struct proc_dir_entry proc_sys_root; diff --git a/mm/filemap.c b/mm/filemap.c index c0a17a00f595..4e885758f624 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -228,12 +228,14 @@ repeat: spin_unlock(&pagecache_lock); } +extern atomic_t too_many_dirty_buffers; + int shrink_mmap(int priority, int gfp_mask) { static unsigned long clock = 0; unsigned long limit = num_physpages; struct page * page; - int count, err; + int count, users; count = limit >> priority; @@ -262,24 +264,64 @@ int shrink_mmap(int priority, int gfp_mask) if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) continue; - if (PageLocked(page)) + /* + * Some common cases that we just short-circuit without + * getting the locks - we need to re-check this once we + * have the lock, but that's fine. + */ + users = page_count(page); + if (!users) continue; + if (!page->buffers) { + if (!page->inode) + continue; + if (users > 1) + continue; + } + + /* + * ok, now the page looks interesting. Re-check things + * and keep the lock. + */ + spin_lock(&pagecache_lock); + if (!page->inode && !page->buffers) { + spin_unlock(&pagecache_lock); + continue; + } + if (!page_count(page)) { +// BUG(); + spin_unlock(&pagecache_lock); + continue; + } + get_page(page); + if (TryLockPage(page)) { + spin_unlock(&pagecache_lock); + goto put_continue; + } + + /* + * we keep pagecache_lock locked and unlock it in + * each branch, so that the page->inode case doesnt + * have to re-grab it. Here comes the 'real' logic + * to free memory: + */ /* Is it a buffer page? */ if (page->buffers) { - if (TryLockPage(page)) - continue; - err = try_to_free_buffers(page); - UnlockPage(page); - - if (!err) - continue; - goto out; + kdev_t dev = page->buffers->b_dev; + spin_unlock(&pagecache_lock); + if (try_to_free_buffers(page)) + goto made_progress; + if (!atomic_read(&too_many_dirty_buffers)) { + atomic_set(&too_many_dirty_buffers, 1); + balance_dirty(dev); + } + goto unlock_continue; } /* We can't free pages unless there's just one user */ - if (page_count(page) != 1) - continue; + if (page_count(page) != 2) + goto spin_unlock_continue; count--; @@ -289,44 +331,36 @@ int shrink_mmap(int priority, int gfp_mask) * were to be marked referenced.. */ if (PageSwapCache(page)) { - if (referenced && swap_count(page->offset) != 1) - continue; - delete_from_swap_cache(page); - err = 1; - goto out; + spin_unlock(&pagecache_lock); + if (referenced && swap_count(page->offset) != 2) + goto unlock_continue; + __delete_from_swap_cache(page); + page_cache_release(page); + goto made_progress; } - if (referenced) - continue; - /* is it a page-cache page? */ - spin_lock(&pagecache_lock); - if (page->inode) { - if (pgcache_under_min()) - goto unlock_continue; - if (TryLockPage(page)) - goto unlock_continue; - - if (page_count(page) == 1) { - remove_page_from_inode_queue(page); - remove_page_from_hash_queue(page); - page->inode = NULL; - } + if (!referenced && page->inode && !pgcache_under_min()) { + remove_page_from_inode_queue(page); + remove_page_from_hash_queue(page); + page->inode = NULL; spin_unlock(&pagecache_lock); - UnlockPage(page); page_cache_release(page); - err = 1; - goto out; -unlock_continue: - spin_unlock(&pagecache_lock); - continue; + goto made_progress; } +spin_unlock_continue: spin_unlock(&pagecache_lock); +unlock_continue: + UnlockPage(page); +put_continue: + put_page(page); } while (count > 0); - err = 0; -out: - return err; + return 0; +made_progress: + UnlockPage(page); + put_page(page); + return 1; } static inline struct page * __find_page_nolock(struct inode * inode, unsigned long offset, struct page *page) @@ -499,9 +533,7 @@ static unsigned long try_to_read_ahead(struct file * file, * We do not have to check the return value here * because it's a readahead. */ - lock_kernel(); inode->i_op->readpage(file, page); - unlock_kernel(); page_cache = 0; page_cache_release(page); } @@ -522,15 +554,13 @@ void ___wait_on_page(struct page *page) DECLARE_WAITQUEUE(wait, tsk); add_wait_queue(&page->wait, &wait); - tsk->state = TASK_UNINTERRUPTIBLE; - run_task_queue(&tq_disk); - if (PageLocked(page)) { - do { - tsk->state = TASK_UNINTERRUPTIBLE; - run_task_queue(&tq_disk); - schedule(); - } while (PageLocked(page)); - } + do { + tsk->state = TASK_UNINTERRUPTIBLE; + run_task_queue(&tq_disk); + if (!PageLocked(page)) + break; + schedule(); + } while (PageLocked(page)); tsk->state = TASK_RUNNING; remove_wait_queue(&page->wait, &wait); } @@ -1039,11 +1069,9 @@ page_not_up_to_date: goto page_ok; } -read_page: +readpage: /* ... and start the actual read. The read will unlock the page. */ - lock_kernel(); error = inode->i_op->readpage(filp, page); - unlock_kernel(); if (!error) { if (Page_Uptodate(page)) @@ -1095,7 +1123,7 @@ no_cached_page: spin_unlock(&pagecache_lock); page_cache = 0; - goto read_page; + goto readpage; } *ppos = pos; @@ -1221,6 +1249,7 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou if (retval) goto fput_out; + unlock_kernel(); retval = 0; if (count) { read_descriptor_t desc; @@ -1230,7 +1259,7 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou ppos = &in_file->f_pos; if (offset) { if (get_user(pos, offset)) - goto fput_out; + goto fput_out_lock; ppos = &pos; } @@ -1247,7 +1276,8 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou put_user(pos, offset); } - +fput_out_lock: + lock_kernel(); fput_out: fput(out_file); fput_in: @@ -1283,6 +1313,7 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset; if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm) goto no_page_nolock; + unlock_kernel(); /* @@ -1306,10 +1337,12 @@ found_page: goto failure; } - wait_on_page(page); - - if (!Page_Uptodate(page)) - PAGE_BUG(page); + if (!Page_Uptodate(page)) { + lock_page(page); + if (!Page_Uptodate(page)) + goto page_not_uptodate; + UnlockPage(page); + } success: /* @@ -1377,9 +1410,8 @@ no_cached_page: */ new_page = 0; - lock_kernel(); +page_not_uptodate: error = inode->i_op->readpage(file, page); - unlock_kernel(); if (!error) { wait_on_page(page); @@ -1398,9 +1430,7 @@ page_read_error: if (!PageLocked(page)) PAGE_BUG(page); ClearPageError(page); - lock_kernel(); error = inode->i_op->readpage(file, page); - unlock_kernel(); if (error) goto failure; wait_on_page(page); diff --git a/mm/swap_state.c b/mm/swap_state.c index 1b33794fd920..21723c1db536 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -39,15 +39,14 @@ static struct inode_operations swapper_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + block_flushpage, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ - NULL, /* revalidate */ - generic_block_flushpage, /* flushpage */ + NULL /* revalidate */ }; struct inode swapper_inode = { i_op: &swapper_inode_operations }; @@ -221,17 +220,14 @@ static inline void remove_from_swap_cache(struct page *page) remove_inode_page(page); } - /* * This must be called only on pages that have * been verified to be in the swap cache. */ -void delete_from_swap_cache(struct page *page) +void __delete_from_swap_cache(struct page *page) { long entry = page->offset; - lock_page(page); - #ifdef SWAP_CACHE_INFO swap_cache_del_total++; #endif @@ -241,9 +237,21 @@ void delete_from_swap_cache(struct page *page) page_address(page), page_count(page), entry); #endif remove_from_swap_cache (page); + swap_free (entry); +} + +/* + * This must be called only on pages that have + * been verified to be in the swap cache. + */ +void delete_from_swap_cache(struct page *page) +{ + lock_page(page); + + __delete_from_swap_cache(page); + UnlockPage(page); page_cache_release(page); - swap_free (entry); } /* @@ -258,9 +266,8 @@ void free_page_and_swap_cache(unsigned long addr) /* * If we are the only user, then free up the swap cache. */ - if (PageSwapCache(page) && !is_page_shared(page)) { + if (PageSwapCache(page) && !is_page_shared(page)) delete_from_swap_cache(page); - } __free_page(page); } -- 2.39.5