This is a general announcement of the imminent code-freeze that will

author Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:09:19 +0000 (15:09 -0500)

committer Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:09:19 +0000 (15:09 -0500)
author Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:09:19 +0000 (15:09 -0500)
committer Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:09:19 +0000 (15:09 -0500)
diff --git a/Makefile b/Makefile

index 7028cdfff6526288cf637a415fad701d22028398..49dbefdf8c2425838fd6eb711b2cc1aec667d0a0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
  VERSION = 0.99
  PATCHLEVEL = 14
-ALPHA = q
+ALPHA = r
  
  all:   Version zImage
  
diff --git a/drivers/char/serial.c b/drivers/char/serial.c

index 0330087d76fc83deae726a1c4e11d180533eadcf..c25c17b02bab73a514bbd351b38ea54841c11186 100644 (file)
--- a/drivers/char/serial.c
+++ b/drivers/char/serial.c
@@ -1574,7 +1574,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
          */
         if (info->flags & ASYNC_CLOSING) {
                 interruptible_sleep_on(&info->close_wait);
-               return -ERESTARTSYS;
+               return -EAGAIN;
         }
  
         /*
@@ -1632,7 +1632,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
                 current->state = TASK_INTERRUPTIBLE;
                 if (tty_hung_up_p(filp) ||
                     !(info->flags & ASYNC_INITIALIZED)) {
-                       retval = -ERESTARTSYS;
+                       retval = -EAGAIN;
                         break;
                 }
                 if (!(info->flags & ASYNC_CALLOUT_ACTIVE) &&
diff --git a/include/linux/mman.h b/include/linux/mman.h

index 18e253980c6628f066cef9934154e3d7ed3d8704..4b42f737f08a604490d79f5fc7b6d6b3ee4f09c4 100644 (file)
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -10,5 +10,6 @@
  #define MAP_PRIVATE      2         /* Changes are private */
  #define MAP_TYPE         0xf       /* Mask for type of mapping */
  #define MAP_FIXED        0x10      /* Interpret addr exactly */
+#define MAP_ANONYMOUS    0x20      /* don't use a file */
  
  #endif /* _LINUX_MMAN_H */
diff --git a/mm/mmap.c b/mm/mmap.c

index 10e24dfc6d7784f8cb3cad34ca4553bdb3d27da0..5fea243bd37661bfc27e90ccb772ff265a3b7858 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -137,17 +137,20 @@ int do_mmap(struct file * file, unsigned long addr, unsigned long len,
  asmlinkage int sys_mmap(unsigned long *buffer)
  {
         int error;
-       unsigned long fd;
-       struct file * file;
+       unsigned long flags;
+       struct file * file = NULL;
  
         error = verify_area(VERIFY_READ, buffer, 6*4);
         if (error)
                 return error;
-       fd = get_fs_long(buffer+4);
-       if (fd >= NR_OPEN || !(file = current->filp[fd]))
-               return -EBADF;
+       flags = get_fs_long(buffer+3);
+       if (!(flags & MAP_ANONYMOUS)) {
+               unsigned long fd = get_fs_long(buffer+4);
+               if (fd >= NR_OPEN || !(file = current->filp[fd]))
+                       return -EBADF;
+       }
         return do_mmap(file, get_fs_long(buffer), get_fs_long(buffer+1),
-               get_fs_long(buffer+2), get_fs_long(buffer+3), get_fs_long(buffer+5));
+               get_fs_long(buffer+2), flags, get_fs_long(buffer+5));
  }
  
  /*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c

index 6cbaa15ff2a9bebfeeefb70531ff07b313f5a660..99845dd4406931ae7dba6eaeb0ef7d0c2fe5921b 100644 (file)
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -158,7 +158,7 @@ void * vmalloc(unsigned long size)
         area->size = size + PAGE_SIZE;
         area->next = NULL;
         for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
-               if (size + (unsigned long) addr <= (unsigned long) tmp->addr)
+               if (size + (unsigned long) addr < (unsigned long) tmp->addr)
                         break;
                 addr = (void *) (tmp->size + (unsigned long) tmp->addr);
         }
diff --git a/net/inet/ip.c b/net/inet/ip.c

index 761f5f128533deeab26e030a70214eca639dda7a..b31ee4c11b9640cde5e273c8e8389e3ea66c6caf 100644 (file)
--- a/net/inet/ip.c
+++ b/net/inet/ip.c
@@ -800,6 +800,7 @@ static struct sk_buff *ip_glue(struct ipq *qp)
         iph = skb->h.iph;
         iph->frag_off = 0;
         iph->tot_len = htons((iph->ihl * sizeof(unsigned long)) + count);
+       skb->ip_hdr = iph;
         return(skb);
  }
   
diff --git a/net/inet/tcp.c b/net/inet/tcp.c

index 15d2d1355d0dd9ba7377c2a513af0a024b885de9..f24226a805988d3cb4038d4684077c702a4f84f4 100644 (file)
--- a/net/inet/tcp.c
+++ b/net/inet/tcp.c
@@ -208,8 +208,15 @@ tcp_retransmit(struct sock *sk, int all)
         return;
    }
  
-  if (sk->cong_window > 4)
-       sk->cong_window = sk->cong_window / 2;
+/*
+ *  If we had the full V-J mechanism, this might be right.  But
+ *  for the moment we want simple slow start after error.
+ *
+ *  if (sk->cong_window > 4)
+ *       sk->cong_window = sk->cong_window / 2;
+ */
+ 
+  sk->cong_window = 1;
    sk->exp_growth = 0;
  
    /* Do the actuall retransmit. */
@@ -613,6 +620,7 @@ tcp_send_partial(struct sock *sk)
    
    skb->h.seq = sk->send_seq;
    if (after(sk->send_seq , sk->window_seq) ||
+      sk->retransmits ||
        sk->packets_out >= sk->cong_window) {
         DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
                                         sk->cong_window, sk->packets_out));
@@ -833,67 +841,75 @@ tcp_write(struct sock *sk, unsigned char *from,
  
         /* Now we need to check if we have a half built packet. */
         if (sk->send_tmp != NULL) {
-               /* If sk->mss has been changed this could cause problems. */
+               int hdrlen;
  
-               /* Add more stuff to the end of skb->len */
                 skb = sk->send_tmp;
+
+                        /* IP header + TCP header */
+               hdrlen = ((unsigned long)skb->h.th - (unsigned long)(skb+1))
+                        + sizeof(struct tcphdr);
+
+               /* If sk->mtu has been changed this could cause problems. */
+
+               /* Add more stuff to the end of skb->len */
                 if (!(flags & MSG_OOB)) {
-                       copy = min(sk->mss - skb->len + 128 +
-                                  prot->max_header, len);
-             
-               /* FIXME: this is really a bug. */
-               if (copy <= 0) {
-                       printk("TCP: **bug**: \"copy\" <= 0!!\n");
-                       copy = 0;
-               }
+                       copy = min(sk->mtu - (skb->len - hdrlen), len);
+                       /* FIXME: this is really a bug. */
+                       if (copy <= 0) {
+                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
+                         copy = 0;
+                       }
           
-               memcpy_fromfs((unsigned char *)(skb+1) + skb->len, from, copy);
-               skb->len += copy;
-               from += copy;
-               copied += copy;
-               len -= copy;
-               sk->send_seq += copy;
-       }
-
-       if (skb->len -(unsigned long)skb->h.th +
-          (unsigned long)(skb+1) >= sk->mss ||(flags & MSG_OOB)) {
-               tcp_send_partial(sk);
+                       memcpy_fromfs((unsigned char *)(skb+1) + skb->len, 
+                                     from, copy);
+                       skb->len += copy;
+                       from += copy;
+                       copied += copy;
+                       len -= copy;
+                       sk->send_seq += copy;
+                     }
+
+               if ((skb->len - hdrlen) > sk->mtu || (flags & MSG_OOB)) {
+                 tcp_send_partial(sk);
+               }
+               continue;
         }
-       continue;
-  }
  
-  /*
-   * We also need to worry about the window.
-   * The smallest we will send is about 200 bytes.
-   * This is a bit sad for TCP/AMPR people running
-   * 196 byte windows! - FIXME
-   */
-  copy = min(sk->mtu, diff(sk->window_seq, sk->send_seq));
+       /*
+        * We also need to worry about the window.
+        * If window < 1/4 offered window, don't use it.  That's
+        *   silly window prevention.  What we actually do is 
+        *   use the whole MTU.  Since the results in the right
+        *   edge of the packet being outside the window, it will
+        *   be queued for later rather than sent.
+        */
  
-  /* FIXME: redundent check here. */
-  if (copy < 200 || copy > sk->mtu) copy = sk->mtu;
-  copy = min(copy, len);
+       copy = diff(sk->window_seq, sk->send_seq);
+       if (copy < (diff(sk->window_seq, sk->rcv_ack_seq) >> 2))
+         copy = sk->mtu;
+       copy = min(copy, sk->mtu);
+       copy = min(copy, len);
  
    /* We should really check the window here also. */
-  if (sk->packets_out && copy < sk->mss && !(flags & MSG_OOB)) {
+       if (sk->packets_out && copy < sk->mtu && !(flags & MSG_OOB)) {
         /* We will release the socket incase we sleep here. */
-       release_sock(sk);
-       skb = (struct sk_buff *) prot->wmalloc(sk,
-                       sk->mss + 128 + prot->max_header +
+         release_sock(sk);
+         skb = (struct sk_buff *) prot->wmalloc(sk,
+                       sk->mtu + 128 + prot->max_header +
                         sizeof(*skb), 0, GFP_KERNEL);
-       sk->inuse = 1;
-       sk->send_tmp = skb;
-       if (skb != NULL)
-               skb->mem_len = sk->mss + 128 + prot->max_header + sizeof(*skb);
+         sk->inuse = 1;
+         sk->send_tmp = skb;
+         if (skb != NULL)
+               skb->mem_len = sk->mtu + 128 + prot->max_header + sizeof(*skb);
         } else {
                 /* We will release the socket incase we sleep here. */
-               release_sock(sk);
-               skb = (struct sk_buff *) prot->wmalloc(sk,
-                               copy + prot->max_header +
-                               sizeof(*skb), 0, GFP_KERNEL);
-               sk->inuse = 1;
-               if (skb != NULL)
-                       skb->mem_len = copy+prot->max_header + sizeof(*skb);
+         release_sock(sk);
+         skb = (struct sk_buff *) prot->wmalloc(sk,
+                                                copy + prot->max_header +
+                                                sizeof(*skb), 0, GFP_KERNEL);
+         sk->inuse = 1;
+         if (skb != NULL)
+           skb->mem_len = copy+prot->max_header + sizeof(*skb);
         }
  
         /* If we didn't get any memory, we need to sleep. */
@@ -980,6 +996,7 @@ tcp_write(struct sock *sk, unsigned char *from,
  
         skb->h.seq = sk->send_seq;
         if (after(sk->send_seq , sk->window_seq) ||
+                 sk->retransmits ||
                   sk->packets_out >= sk->cong_window) {
                 DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
                                         sk->cong_window, sk->packets_out));
@@ -998,8 +1015,23 @@ tcp_write(struct sock *sk, unsigned char *from,
         }
    }
    sk->err = 0;
+
+/*
+ * In its original form, the following code implements
+ * Nagle's rule.  Everybody recommends it, and probably
+ * production code should use it.  But it slows down response enough
+ * that I dont' like it.  So as long as we're not bumping into 
+ * the window, I go ahead and send it.  If you want Nagle, just
+ * define USE_NAGLE  --C. Hedrick
+ */
+
    /* Avoid possible race on send_tmp - c/o Johannes Stille */
-  if(sk->send_tmp && !sk->packets_out)
+  if(sk->send_tmp && 
+     ((!sk->packets_out) 
+#ifndef USE_NAGLE
+      || before(sk->send_seq , sk->window_seq)
+#endif
+      ))
         tcp_send_partial(sk);
    /* -- */
    release_sock(sk);
@@ -1696,7 +1728,6 @@ tcp_options(struct sock *sk, struct tcphdr *th)
  {
    unsigned char *ptr;
    int length=(th->doff*4)-sizeof(struct tcphdr);
-  int mtuset=0;
      
    ptr = (unsigned char *)(th + 1);
    
@@ -1721,7 +1752,6 @@ tcp_options(struct sock *sk, struct tcphdr *th)
                                         if(opsize==4)
                                         {
                                                 sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
-                                               mtuset=1;
                                         }
                                         break;
                                 /* Add other options here as people feel the urge to implement stuff like large windows */
@@ -1730,12 +1760,7 @@ tcp_options(struct sock *sk, struct tcphdr *th)
                         length-=opsize;
         }
    }
-                                       
-  if (!mtuset) 
-  {
-       sk->mtu = min(sk->mtu, 576 - HEADER_SIZE);
-       return;
-  }
+
  }
  
  /*
@@ -1857,6 +1882,16 @@ tcp_conn_request(struct sock *sk, struct sk_buff *skb,
    newsk->ip_ttl=sk->ip_ttl;
    newsk->ip_tos=skb->ip_hdr->tos;
  
+/* use 512 or whatever user asked for */
+/* note use of sk->mss, since user has no direct access to newsk */
+  if (sk->mss)
+    newsk->mtu = sk->mss;
+  else
+    newsk->mtu = 576 - HEADER_SIZE;
+/* but not bigger than device MTU */
+  newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
+
+/* this will min with what arrived in the packet */
    tcp_options(newsk,skb->h.th);
  
    buff = (struct sk_buff *) newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
@@ -1916,8 +1951,8 @@ tcp_conn_request(struct sock *sk, struct sk_buff *skb,
    ptr =(unsigned char *)(t1+1);
    ptr[0] = 2;
    ptr[1] = 4;
-  ptr[2] =((dev->mtu - HEADER_SIZE) >> 8) & 0xff;
-  ptr[3] =(dev->mtu - HEADER_SIZE) & 0xff;
+  ptr[2] = ((newsk->mtu) >> 8) & 0xff;
+  ptr[3] =(newsk->mtu) & 0xff;
  
    tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
    newsk->prot->queue_xmit(newsk, dev, buff, 0);
@@ -2097,7 +2132,8 @@ tcp_write_xmit(struct sock *sk)
  
    while(sk->wfront != NULL &&
          before(sk->wfront->h.seq, sk->window_seq) &&
-        sk->packets_out < sk->cong_window) {
+       (sk->retransmits == 0 || before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
+        && sk->packets_out < sk->cong_window) {
                 skb = sk->wfront;
                 IS_SKB(skb);
                 sk->wfront =(struct sk_buff *)skb->next;
@@ -2270,8 +2306,25 @@ tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
         if (before(sk->send_head->h.seq, ack+1)) {
                 struct sk_buff *oskb;
  
-               sk->retransmits = 0;
+               if (sk->retransmits) {
+
+                 /* if we're retransmitting, don't start any new
+                  * packets until after everything in retransmit queue
+                  * is acked.  That's as close as I can come at the
+                  * moment to slow start the way this code is organized
+                  */
+                 if (sk->send_head->link3)
+                   sk->retransmits = 1;
+                 else
+                   sk->retransmits = 0;
+               }
  
+               /*
+                * need to restart backoff whenever we get a response,
+                * or things get impossible if we lose a window-full of
+                * data with very small MSS
+                */
+               sk->backoff = 0;
                 /* We have one less packet out there. */
                 if (sk->packets_out > 0) sk->packets_out --;
                 DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
@@ -2282,13 +2335,22 @@ tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
  
                 oskb = sk->send_head;
  
-               /* Estimate the RTT. Ignore the ones right after a retransmit. */
-               if (sk->retransmits == 0 && !(flag&2)) {
+               /* 
+                * In theory we're supposed to ignore rtt's when there's
+                * retransmission in process.  Unfortunately this means
+                * that if there's a sharp increase in RTT, we may 
+                * never get out of retransmission.  For the moment
+                * ignore the test.
+                */
+
+               if (/* sk->retransmits == 0 && */ !(flag&2)) {
                   long abserr, rtt = jiffies - oskb->when;
  
-                 if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
+                 if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
                     /* first ack, so nothing else to average with */
                     sk->rtt = rtt;
+                   sk->mdev = rtt; /* overcautious initial estimate */
+                 }
                   else {
                     abserr = (rtt > sk->rtt) ? rtt - sk->rtt : sk->rtt - rtt;
                     sk->rtt = (7 * sk->rtt + rtt) >> 3;
@@ -2328,7 +2390,9 @@ tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     */
    if (sk->wfront != NULL) {
         if (after (sk->window_seq, sk->wfront->h.seq) &&
-               sk->packets_out < sk->cong_window) {
+               (sk->retransmits == 0 || 
+                before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
+               && sk->packets_out < sk->cong_window) {
                 flag |= 1;
                 tcp_write_xmit(sk);
         }
@@ -2390,7 +2454,7 @@ tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
        (sk->send_head->when + backoff(sk->backoff) * (2 * sk->mdev + sk->rtt)
         < jiffies)) {
         sk->exp_growth = 0;
-       ip_retransmit(sk, 0);
+       ip_retransmit(sk, 1);
    }
  
    DPRINTF((DBG_TCP, "leaving tcp_ack\n"));
@@ -2855,13 +2919,20 @@ tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
    t1->urg_ptr = 0;
    t1->doff = 6;
  
+/* use 512 or whatever user asked for */
+  if (sk->mss)
+    sk->mtu = sk->mss;
+  else
+    sk->mtu = 576 - HEADER_SIZE;
+/* but not bigger than device MTU */
+  sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
+
    /* Put in the TCP options to say MTU. */
    ptr = (unsigned char *)(t1+1);
    ptr[0] = 2;
    ptr[1] = 4;
-  ptr[2] = (dev->mtu- HEADER_SIZE) >> 8;
-  ptr[3] = (dev->mtu- HEADER_SIZE) & 0xff;
-  sk->mtu = dev->mtu - HEADER_SIZE;
+  ptr[2] = (sk->mtu) >> 8;
+  ptr[3] = (sk->mtu) & 0xff;
    tcp_send_check(t1, sk->saddr, sk->daddr,
                   sizeof(struct tcphdr) + 4, sk);
author	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:09:19 +0000 (15:09 -0500)
committer	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:09:19 +0000 (15:09 -0500)
Makefile		patch \| blob \| history
drivers/char/serial.c		patch \| blob \| history
include/linux/mman.h		patch \| blob \| history
mm/mmap.c		patch \| blob \| history
mm/vmalloc.c		patch \| blob \| history
net/inet/ip.c		patch \| blob \| history
net/inet/tcp.c		patch \| blob \| history