From 5892de9e7bf6cbd500ad0905cb2b2bbc1139608b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:15:22 -0500 Subject: [PATCH] Linux 2.1.100pre1 pre-100 (on ftp.kernel.org now), moves the dcache shrinking into the regular memory de-allocation loop, and while the exact shrinking speed is probably completely off, it should be able to react much better to small-memory machines than the hardcoded shrink did.. Also, for those that appear to still have SMP interrupt stability problems, Ingo pointed out that we may have problems with PCI level-triggered interrupts. Could those people please test an additional small patch that involves moving the "ack_APIC_irq();" inside arch/i386/kernel/irq.c: do_ioapic_IRQ() from the top of the function to the very bottom of that function (that will move it to outside the irq controller lock, but it should actually be perfectly ok in this case). Linus --- Documentation/filesystems/vfs.txt | 11 ++- Makefile | 2 +- drivers/net/sdla_fr.c | 10 +- drivers/net/sdla_x25.c | 4 +- drivers/net/sdlamain.c | 4 +- drivers/net/smc-mca.c | 2 +- drivers/scsi/scsi_ioctl.c | 60 ++++++++---- drivers/scsi/sd.c | 9 +- fs/dcache.c | 9 +- fs/namei.c | 21 ++-- include/linux/icmpv6.h | 5 +- include/linux/ipv6.h | 22 ++++- include/net/ipv6.h | 8 +- include/net/ndisc.h | 2 + include/net/tcp.h | 2 +- include/scsi/scsi_ioctl.h | 2 + mm/vmscan.c | 14 +-- net/core/dev.c | 6 ++ net/ethernet/pe2.c | 3 +- net/ipv4/ip_fw.c | 153 +++++++++++++++++------------ net/ipv4/tcp_input.c | 53 +--------- net/ipv4/tcp_ipv4.c | 38 ++++++-- net/ipv4/tcp_timer.c | 12 +-- net/ipv6/exthdrs.c | 81 +++++++++++++++- net/ipv6/icmp.c | 156 +++++++++++++++++++----------- net/ipv6/ip6_input.c | 8 +- net/ipv6/ipv6_sockglue.c | 4 +- net/ipv6/mcast.c | 10 +- net/ipv6/ndisc.c | 3 - net/ipv6/reassembly.c | 79 +++++++++++---- net/ipv6/tcp_ipv6.c | 23 +++-- net/netsyms.c | 2 +- 32 files changed, 540 insertions(+), 278 deletions(-) diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 7f75f4770de1..9dfe8dc27c31 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -10,7 +10,6 @@ pages of code to determine what is expected when writing a filesystem. Hopefully this helps anyone attempting such a feat, as well as clearing up a few important points/dependencies. - register_filesystem (struct file_system_type *fstype) ===================================================== @@ -133,10 +132,12 @@ struct inode_operations int (*follow_link) (struct inode *,struct inode *,int,int,struct inode **); [optional] - The follow_link function is only necessary if a filesystem uses a really - twisted form of symbolic links - namely if the symbolic link comes from a - foreign filesystem that makes no sense.... - I threw this one out - too much redundant code! + follow_link must be implemented if readlink is implemented. + Note that follow_link can return a different inode than a + lookup_dentry() on the result of readlink() would return. + The proc filesystem, in particular, uses this feature heavily. + For most user filesystems, however, follow_link() and readlink() + should return consistent results. int (*readpage) (struct inode *, struct page *); [optional] int (*writepage) (struct inode *, struct page *); [mandatory with readpage] diff --git a/Makefile b/Makefile index 1586e862d0f0..ebbc501a129d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 1 -SUBLEVEL = 99 +SUBLEVEL = 100 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/) diff --git a/drivers/net/sdla_fr.c b/drivers/net/sdla_fr.c index 79bd5d4ca0a2..36b74b90e3ca 100644 --- a/drivers/net/sdla_fr.c +++ b/drivers/net/sdla_fr.c @@ -831,7 +831,7 @@ static int if_header(struct sk_buff *skb, struct device *dev, { int hdr_len = 0; skb->protocol = type; - hdr_len = wan_encapsulate(skb, dev); + hdr_len = wanrouter_encapsulate(skb, dev); if (hdr_len < 0) { hdr_len = 0; @@ -1486,7 +1486,7 @@ static void fr502_rx_intr(sdla_t * card) /* Decapsulate packet and pass it up the protocol stack */ skb->dev = dev; buf = skb_pull(skb, 1); /* remove hardware header */ - if (!wan_type_trans(skb, dev)) + if (!wanrouter_type_trans(skb, dev)) { /* can't decapsulate packet */ dev_kfree_skb(skb); @@ -1601,7 +1601,7 @@ static void fr508_rx_intr(sdla_t * card) skb->dev = dev; /* remove hardware header */ buf = skb_pull(skb, 1); - if (!wan_type_trans(skb, dev)) + if (!wanrouter_type_trans(skb, dev)) { /* can't decapsulate packet */ dev_kfree_skb(skb); @@ -2746,7 +2746,7 @@ static int process_udp_mgmt_pkt(char udp_pkt_src, sdla_t * card, struct sk_buff stack */ new_skb->dev = dev; buf = skb_pull(new_skb, 1); /* remove hardware header */ - if (!wan_type_trans(new_skb, dev)) + if (!wanrouter_type_trans(new_skb, dev)) { ++chan->UDP_FPIPE_mgmt_not_passed_to_stack; /* can't decapsulate packet */ @@ -2944,7 +2944,7 @@ static int process_udp_driver_call(char udp_pkt_src, sdla_t * card, struct sk_bu new_skb->dev = dev; /* remove hardware header */ buf = skb_pull(new_skb, 1); - if (!wan_type_trans(new_skb, dev)) + if (!wanrouter_type_trans(new_skb, dev)) { /* can't decapsulate packet */ ++chan->UDP_DRVSTATS_mgmt_not_passed_to_stack; diff --git a/drivers/net/sdla_x25.c b/drivers/net/sdla_x25.c index 6a1759040bc3..af7d318443bf 100644 --- a/drivers/net/sdla_x25.c +++ b/drivers/net/sdla_x25.c @@ -649,7 +649,7 @@ static int if_header (struct sk_buff* skb, struct device* dev, skb->protocol = type; if (!chan->protocol) { - hdr_len = wan_encapsulate(skb, dev); + hdr_len = wanrouter_encapsulate(skb, dev); if (hdr_len < 0) { hdr_len = 0; @@ -999,7 +999,7 @@ static void rx_intr (sdla_t* card) chan->rx_skb = NULL; /* dequeue packet */ /* Decapsulate packet, if necessary */ - if (!skb->protocol && !wan_type_trans(skb, dev)) + if (!skb->protocol && !wanrouter_type_trans(skb, dev)) { /* can't decapsulate packet */ dev_kfree_skb(skb); diff --git a/drivers/net/sdlamain.c b/drivers/net/sdlamain.c index eed8c9f366c6..44fabaecb2cf 100644 --- a/drivers/net/sdlamain.c +++ b/drivers/net/sdlamain.c @@ -152,7 +152,7 @@ int init_module (void) wandev->setup = &setup; wandev->shutdown = &shutdown; wandev->ioctl = &ioctl; - err = register_wandev(wandev); + err = register_wan_device(wandev); if (err) { printk(KERN_ERR @@ -179,7 +179,7 @@ void cleanup_module (void) for (i = 0; i < ncards; ++i) { sdla_t* card = &card_array[i]; - unregister_wandev(card->devname); + unregister_wan_device(card->devname); } kfree(card_array); } diff --git a/drivers/net/smc-mca.c b/drivers/net/smc-mca.c index 79d6d8076853..152beacbc930 100644 --- a/drivers/net/smc-mca.c +++ b/drivers/net/smc-mca.c @@ -337,7 +337,7 @@ int init_module(void) dev->name = namelist+(NAMELEN*this_dev); dev->irq = irq[this_dev]; dev->base_addr = io[this_dev]; - dev->init = ultra_probe; + dev->init = ultramca_probe; if (io[this_dev] == 0) { if (this_dev != 0) diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c index a052caadfe9b..e2f933fb3c5d 100644 --- a/drivers/scsi/scsi_ioctl.c +++ b/drivers/scsi/scsi_ioctl.c @@ -17,8 +17,13 @@ #include "hosts.h" #include -#define MAX_RETRIES 5 -#define MAX_TIMEOUT (9 * HZ) +#define NORMAL_RETRIES 5 +#define NORMAL_TIMEOUT (10 * HZ) +#define FORMAT_UNIT_TIMEOUT (2 * 60 * 60 * HZ) +#define START_STOP_TIMEOUT (60 * HZ) +#define MOVE_MEDIUM_TIMEOUT (5 * 60 * HZ) +#define READ_ELEMENT_STATUS_TIMEOUT (5 * 60 * HZ) + #define MAX_BUF PAGE_SIZE #define max(a,b) (((a) > (b)) ? (a) : (b)) @@ -61,7 +66,7 @@ static int ioctl_probe(struct Scsi_Host * host, void *buffer) /* * * The SCSI_IOCTL_SEND_COMMAND ioctl sends a command out to the SCSI host. - * The MAX_TIMEOUT and MAX_RETRIES variables are used. + * The NORMAL_TIMEOUT and NORMAL_RETRIES variables are used. * * dev is the SCSI device struct ptr, *(int *) arg is the length of the * input data, if any, not including the command string & counts, @@ -94,7 +99,8 @@ static void scsi_ioctl_done (Scsi_Cmnd * SCpnt) } } -static int ioctl_internal_command(Scsi_Device *dev, char * cmd) +static int ioctl_internal_command(Scsi_Device *dev, char * cmd, + int timeout, int retries) { unsigned long flags; int result; @@ -107,9 +113,7 @@ static int ioctl_internal_command(Scsi_Device *dev, char * cmd) struct semaphore sem = MUTEX_LOCKED; SCpnt->request.sem = &sem; spin_lock_irqsave(&io_request_lock, flags); - scsi_do_cmd(SCpnt, cmd, NULL, 0, - scsi_ioctl_done, MAX_TIMEOUT, - MAX_RETRIES); + scsi_do_cmd(SCpnt, cmd, NULL, 0, scsi_ioctl_done, timeout, retries); spin_unlock_irqrestore(&io_request_lock, flags); down(&sem); SCpnt->request.sem = NULL; @@ -250,21 +254,24 @@ int scsi_ioctl_send_command(Scsi_Device *dev, Scsi_Ioctl_Command *sic) switch (opcode) { case FORMAT_UNIT: - timeout = 2 * 60 * 60 * HZ; /* 2 Hours */ + timeout = FORMAT_UNIT_TIMEOUT; retries = 1; break; case START_STOP: - timeout = 2 * 60 * HZ; /* 2 minutes */ - retries = 1; + timeout = START_STOP_TIMEOUT; + retries = NORMAL_RETRIES; break; case MOVE_MEDIUM: + timeout = MOVE_MEDIUM_TIMEOUT; + retries = NORMAL_RETRIES; + break; case READ_ELEMENT_STATUS: - timeout = 5 * 60 * HZ; /* 5 minutes */ - retries = 1; + timeout = READ_ELEMENT_STATUS_TIMEOUT; + retries = NORMAL_RETRIES; break; default: - timeout = MAX_TIMEOUT; - retries = MAX_RETRIES; + timeout = NORMAL_TIMEOUT; + retries = NORMAL_RETRIES; break; } @@ -395,7 +402,8 @@ int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg) scsi_cmd[1] = dev->lun << 5; scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; scsi_cmd[4] = SCSI_REMOVAL_PREVENT; - return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd); + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + NORMAL_TIMEOUT, NORMAL_RETRIES); break; case SCSI_IOCTL_DOORUNLOCK: if (!dev->removable || !dev->lockable) return 0; @@ -403,13 +411,31 @@ int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg) scsi_cmd[1] = dev->lun << 5; scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; scsi_cmd[4] = SCSI_REMOVAL_ALLOW; - return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd); + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + NORMAL_TIMEOUT, NORMAL_RETRIES); case SCSI_IOCTL_TEST_UNIT_READY: scsi_cmd[0] = TEST_UNIT_READY; scsi_cmd[1] = dev->lun << 5; scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; scsi_cmd[4] = 0; - return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd); + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + NORMAL_TIMEOUT, NORMAL_RETRIES); + break; + case SCSI_IOCTL_START_UNIT: + scsi_cmd[0] = START_STOP; + scsi_cmd[1] = dev->lun << 5; + scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; + scsi_cmd[4] = 1; + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + START_STOP_TIMEOUT, NORMAL_RETRIES); + break; + case SCSI_IOCTL_STOP_UNIT: + scsi_cmd[0] = START_STOP; + scsi_cmd[1] = dev->lun << 5; + scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; + scsi_cmd[4] = 0; + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + START_STOP_TIMEOUT, NORMAL_RETRIES); break; default : if (dev->host->hostt->ioctl) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e7da5c95414a..ac752b2e5799 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1060,8 +1060,15 @@ static int check_scsidisk_media_change(kdev_t full_dev){ } inode.i_rdev = full_dev; /* This is all we really need here */ - retval = sd_ioctl(&inode, NULL, SCSI_IOCTL_TEST_UNIT_READY, 0); + /* Using Start/Stop enables differentiation between drive with + * no cartridge loaded - NOT READY, drive with changed cartridge - + * UNIT ATTENTION, or with same cartridge - GOOD STATUS. + * This also handles drives that auto spin down. eg iomega jaz 1GB + * as this will spin up the drive. + */ + retval = sd_ioctl(&inode, NULL, SCSI_IOCTL_START_UNIT, 0); + if(retval){ /* Unable to test, unit probably not ready. This usually * means there is no disc in the drive. Mark as changed, * and we will figure it out later once the drive is diff --git a/fs/dcache.c b/fs/dcache.c index 58c6479c91b0..0ef962687b27 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -430,12 +430,17 @@ void shrink_dcache_parent(struct dentry * parent) * more memory, but aren't really sure how much. So we * carefully try to free a _bit_ of our dcache, but not * too much. + * + * Priority: + * 0 - very urgent: schrink everything + * ... + * 6 - base-level: try to shrink a bit. */ -void shrink_dcache_memory(void) +void shrink_dcache_memory(int priority, unsigned int gfp_mask) { int count = select_dcache(32, 8); if (count) - prune_dcache(count); + prune_dcache((count << 6) >> priority); } #define NAME_ALLOC_LEN(len) ((len+16) & ~15) diff --git a/fs/namei.c b/fs/namei.c index a6de99eadfa3..06257ab99c63 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -43,17 +43,24 @@ * * The new code replaces the old recursive symlink resolution with * an iterative one (in case of non-nested symlink chains). It does - * this by looking up the symlink name from the particular filesystem, - * and then follows this name as if it were a user-supplied one. This - * is done solely in the VFS level, such that _follow_link() is not - * used any more and could be removed in future. As a side effect, - * dir_namei(), _namei() and follow_link() are now replaced with a single - * function lookup_dentry() that can handle all the special cases of the former - * code. + * this with calls to _follow_link(). + * As a side effect, dir_namei(), _namei() and follow_link() are now + * replaced with a single function lookup_dentry() that can handle all + * the special cases of the former code. * * With the new dcache, the pathname is stored at each inode, at least as * long as the refcount of the inode is positive. As a side effect, the * size of the dcache depends on the inode cache and thus is dynamic. + * + * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink + * resolution to correspond with current state of the code. + * + * Note that the symlink resolution is not *completely* iterative. + * There is still a significant amount of tail- and mid- recursion in + * the algorithm. Also, note that _readlink() is not used in + * lookup_dentry(): lookup_dentry() on the result of _readlink() + * may return different results than _follow_link(). Many virtual + * filesystems (including /proc) exhibit this behavior. */ /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index a582e37b58bf..fcd6fce28b89 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -138,9 +138,12 @@ extern void icmpv6_send(struct sk_buff *skb, __u32 info, struct device *dev); -extern void icmpv6_init(struct net_proto_family *ops); +extern int icmpv6_init(struct net_proto_family *ops); extern int icmpv6_err_convert(int type, int code, int *err); +extern void icmpv6_cleanup(void); +extern void icmpv6_param_prob(struct sk_buff *skb, + int code, void *pos); #endif #endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 478c8503c83a..3913524180d1 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -42,6 +42,24 @@ struct ipv6_rt_hdr { */ }; + +struct ipv6_opt_hdr { + __u8 nexthdr; + __u8 hdrlen; + /* + * TLV encoded option data follows. + */ +}; + +#define ipv6_destopt_hdr ipv6_opt_hdr +#define ipv6_hopopt_hdr ipv6_opt_hdr + +#ifdef __KERNEL__ +#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) +#endif + + + /* * routing header type 0 (used in cmsghdr struct) */ @@ -95,13 +113,13 @@ struct ipv6_options /* * protocol options - * usualy carried in IPv6 extension headers + * usually carried in IPv6 extension headers */ struct ipv6_rt_hdr *srcrt; /* Routing Header */ - }; + #endif #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 1a322a49813f..c3d2d58954ad 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -4,7 +4,7 @@ * Authors: * Pedro Roque * - * $Id: ipv6.h,v 1.9 1998/03/08 05:55:20 davem Exp $ + * $Id: ipv6.h,v 1.10 1998/04/30 16:24:14 freitag Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -108,6 +108,8 @@ struct frag_queue { struct frag_queue *prev; __u32 id; /* fragment id */ + struct in6_addr saddr; + struct in6_addr daddr; struct timer_list timer; /* expire timer */ struct ipv6_frag *fragments; struct device *dev; @@ -248,6 +250,10 @@ extern int ipv6opt_srt_tosin(struct ipv6_options *opt, extern void ipv6opt_free(struct ipv6_options *opt); +extern struct ipv6_opt_hdr * ipv6_skip_exthdr(struct ipv6_opt_hdr *hdr, + u8 *nexthdrp, int len); + + /* * socket options (ipv6_sockglue.c) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 754253811e08..adaf76ef3266 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -101,6 +101,8 @@ extern int igmp6_event_report(struct sk_buff *skb, struct icmp6hdr *hdr, int len); +extern void igmp6_cleanup(void); + extern __inline__ struct neighbour * ndisc_get_neigh(struct device *dev, struct in6_addr *addr) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 52853f44a5c1..ca1240b8acaf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -642,7 +642,7 @@ static __inline__ unsigned int tcp_current_mss(struct sock *sk) /* PMTU discovery event has occurred. */ sk->mtu = dst->pmtu; - sk->mss = sk->mtu - mss_distance; + mss_now = sk->mss = sk->mtu - mss_distance; } if(tp->sack_ok && tp->num_sacks) diff --git a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h index 97e20a98a5ed..6ba0dd542f93 100644 --- a/include/scsi/scsi_ioctl.h +++ b/include/scsi/scsi_ioctl.h @@ -5,6 +5,8 @@ #define SCSI_IOCTL_TEST_UNIT_READY 2 #define SCSI_IOCTL_BENCHMARK_COMMAND 3 #define SCSI_IOCTL_SYNC 4 /* Request synchronous parameters */ +#define SCSI_IOCTL_START_UNIT 5 +#define SCSI_IOCTL_STOP_UNIT 6 /* The door lock/unlock constants are compatible with Sun constants for the cdrom */ #define SCSI_IOCTL_DOORLOCK 0x5380 /* lock the eject mechanism */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 8eaeb23d59e5..9644f6d8c39c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -455,17 +455,20 @@ static inline int do_try_to_free_page(int gfp_mask) switch (state) { do { case 0: - state = 1; if (shrink_mmap(i, gfp_mask)) return 1; + state = 1; case 1: - state = 2; if ((gfp_mask & __GFP_IO) && shm_swap(i, gfp_mask)) return 1; - default: - state = 0; + state = 2; + case 2: if (swap_out(i, gfp_mask)) return 1; + state = 3; + case 3: + shrink_dcache_memory(i, gfp_mask); + state = 0; i--; } while ((i - stop) >= 0); } @@ -545,9 +548,6 @@ int kswapd(void *unused) schedule(); swapstats.wakeups++; - /* This will gently shrink the dcache.. */ - shrink_dcache_memory(); - /* * Do the background pageout: be * more aggressive if we're really diff --git a/net/core/dev.c b/net/core/dev.c index 85312b12c0e6..1bb5dede15e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1764,6 +1764,9 @@ extern int baycom_init(void); extern int lapbeth_init(void); extern void arcnet_init(void); extern void ip_auto_config(void); +#ifdef CONFIG_8xx +extern int cpm_enet_init(void); +#endif /* CONFIG_8xx */ #ifdef CONFIG_PROC_FS static struct proc_dir_entry proc_net_dev = { @@ -1845,6 +1848,9 @@ __initfunc(int net_dev_init(void)) #endif #if defined(CONFIG_ARCNET) arcnet_init(); +#endif +#if defined(CONFIG_8xx) + cpm_enet_init(); #endif /* * SLHC if present needs attaching so other people see it diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c index 812d35864399..4915f07072e1 100644 --- a/net/ethernet/pe2.c +++ b/net/ethernet/pe2.c @@ -11,7 +11,8 @@ pEII_datalink_header(struct datalink_proto *dl, struct device *dev = skb->dev; skb->protocol = htons (ETH_P_IPX); - dev->hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len); + if(dev->hard_header) + dev->hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len); } struct datalink_proto * diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c index 28ad5bc334bc..745d07cb4c7b 100644 --- a/net/ipv4/ip_fw.c +++ b/net/ipv4/ip_fw.c @@ -6,7 +6,7 @@ * license in recognition of the original copyright. * -- Alan Cox. * - * $Id: ip_fw.c,v 1.34 1998/03/20 09:12:06 davem Exp $ + * $Id: ip_fw.c,v 1.35 1998/04/30 16:29:51 freitag Exp $ * * Ported from BSD to Linux, * Alan Cox 22/Nov/1994. @@ -62,6 +62,7 @@ * Wouter Gadeyne : Fixed masquerading support of ftp PORT commands * * Juan Jose Ciarlante : Masquerading code moved to ip_masq.c + * Andi Kleen : Print frag_offsets and the ip flags properly. * * All the real work was done by ..... * @@ -202,6 +203,90 @@ extern inline int port_match(unsigned short *portptr,int nports,unsigned short p #if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) +#ifdef CONFIG_IP_FIREWALL_VERBOSE + +/* + * VERY ugly piece of code which actually makes kernel printf for + * matching packets. + */ + +static char *chain_name(struct ip_fw *chain, int mode) +{ + switch (mode) { + case IP_FW_MODE_ACCT_IN: return "acct in"; + case IP_FW_MODE_ACCT_OUT: return "acct out"; + default: + if (chain == ip_fw_fwd_chain) + return "fw-fwd"; + else if (chain == ip_fw_in_chain) + return "fw-in"; + else + return "fw-out"; + } +} + +static char *rule_name(struct ip_fw *f, int mode, char *buf) +{ + if (mode == IP_FW_MODE_ACCT_IN || mode == IP_FW_MODE_ACCT_OUT) + return ""; + + if(f->fw_flg&IP_FW_F_ACCEPT) { + if(f->fw_flg&IP_FW_F_REDIR) { + sprintf(buf, "acc/r%d ", f->fw_pts[f->fw_nsp+f->fw_ndp]); + return buf; + } else if(f->fw_flg&IP_FW_F_MASQ) + return "acc/masq "; + else + return "acc "; + } else if(f->fw_flg&IP_FW_F_ICMPRPL) { + return "rej "; + } else { + return "deny "; + } +} + +static void print_packet(struct iphdr *ip, + u16 src_port, u16 dst_port, u16 icmp_type, + char *chain, char *rule, char *devname) +{ + __u32 *opt = (__u32 *) (ip + 1); + int opti; + __u16 foff = ntohs(ip->frag_off); + + printk(KERN_INFO "IP %s %s%s", chain, rule, devname); + + switch(ip->protocol) + { + case IPPROTO_TCP: + printk(" TCP "); + break; + case IPPROTO_UDP: + printk(" UDP "); + break; + case IPPROTO_ICMP: + printk(" ICMP/%d ", icmp_type); + break; + default: + printk(" PROTO=%d ", ip->protocol); + break; + } + print_ip(ip->saddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%hu", src_port); + printk(" "); + print_ip(ip->daddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%hu", dst_port); + printk(" L=%hu S=0x%2.2hX I=%hu FO=0x%4.4hX T=%hu", + ntohs(ip->tot_len), ip->tos, ntohs(ip->id), + foff & IP_OFFSET, ip->ttl); + if (foff & IP_DF) printk(" DF=1"); + if (foff & IP_MF) printk(" MF=1"); + for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) + printk(" O=0x%8.8X", *opt++); + printk("\n"); +} +#endif /* * Returns one of the generic firewall policies, like FW_ACCEPT. @@ -483,68 +568,14 @@ int ip_fw_chk(struct iphdr *ip, struct device *rif, __u16 *redirport, struct ip_ } #ifdef CONFIG_IP_FIREWALL_VERBOSE - /* - * VERY ugly piece of code which actually - * makes kernel printf for matching packets... - */ - if (f->fw_flg & IP_FW_F_PRN) { - __u32 *opt = (__u32 *) (ip + 1); - int opti; - - if(mode == IP_FW_MODE_ACCT_IN) - printk(KERN_INFO "IP acct in "); - else if(mode == IP_FW_MODE_ACCT_OUT) - printk(KERN_INFO "IP acct out "); - else { - if(chain == ip_fw_fwd_chain) - printk(KERN_INFO "IP fw-fwd "); - else if(chain == ip_fw_in_chain) - printk(KERN_INFO "IP fw-in "); - else - printk(KERN_INFO "IP fw-out "); - if(f->fw_flg&IP_FW_F_ACCEPT) { - if(f->fw_flg&IP_FW_F_REDIR) - printk("acc/r%d ", f->fw_pts[f->fw_nsp+f->fw_ndp]); - else if(f->fw_flg&IP_FW_F_MASQ) - printk("acc/masq "); - else - printk("acc "); - } else if(f->fw_flg&IP_FW_F_ICMPRPL) - printk("rej "); - else - printk("deny "); - } - printk(rif ? rif->name : "-"); - switch(ip->protocol) - { - case IPPROTO_TCP: - printk(" TCP "); - break; - case IPPROTO_UDP: - printk(" UDP "); - break; - case IPPROTO_ICMP: - printk(" ICMP/%d ", icmp_type); - break; - default: - printk(" PROTO=%d ", ip->protocol); - break; - } - print_ip(ip->saddr); - if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) - printk(":%hu", src_port); - printk(" "); - print_ip(ip->daddr); - if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) - printk(":%hu", dst_port); - printk(" L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", - ntohs(ip->tot_len), ip->tos, ntohs(ip->id), - ip->frag_off, ip->ttl); - for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) - printk(" O=0x%8.8X", *opt++); - printk("\n"); + char buf[16]; + + print_packet(ip, src_port, dst_port, icmp_type, + chain_name(chain, mode), + rule_name(f, mode, buf), + rif ? rif->name : "-"); } #endif if (mode != IP_FW_MODE_CHK) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d5b0b15c60ae..7a0a40aeb57f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.114 1998/04/28 06:42:22 davem Exp $ + * Version: $Id: tcp_input.c,v 1.116 1998/05/02 14:50:11 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -421,33 +421,6 @@ static __inline__ int tcp_fast_parse_options(struct sock *sk, struct tcphdr *th, return 1; } -#if 0 /* Not working yet... -DaveM */ -static void tcp_compute_tsack(struct sock *sk, struct tcp_opt *tp) -{ - struct sk_buff *skb = skb_peek(&sk->write_queue); - __u32 tstamp = tp->rcv_tsecr; - int fack_count = 0; - - while((skb != NULL) && - (skb != tp->send_head) && - (skb != (struct sk_buff *)&sk->write_queue)) { - if(TCP_SKB_CB(skb)->when == tstamp) { - __u8 sacked = TCP_SKB_CB(skb)->sacked; - - sacked |= TCPCB_SACKED_ACKED; - if(sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out--; - TCP_SKB_CB(skb)->sacked = sacked; - } - if(!before(TCP_SKB_CB(skb)->when, tstamp)) - fack_count++; - skb = skb->next; - } - if(fack_count > tp->fackets_out) - tp->fackets_out = fack_count; -} -#endif - #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ @@ -481,13 +454,6 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup) if (ack == tp->snd_una && tp->packets_out && (not_dup == 0)) { /* This is the standard reno style fast retransmit branch. */ -#if 0 /* Not working yet... -DaveM */ - /* If not doing SACK, but doing timestamps, compute timestamp - * based pseudo-SACKs when we see duplicate ACKs. - */ - if(!tp->sack_ok && tp->saw_tstamp) - tcp_compute_tsack(sk, tp); -#endif /* 1. When the third duplicate ack is received, set ssthresh * to one half the current congestion window, but no less * than two segments. Retransmit the missing segment. @@ -611,6 +577,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + __u8 sacked = scb->sacked; /* If our packet is before the ack sequence we can * discard it as it's confirmed to have arrived at @@ -626,22 +593,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ + if(sacked & TCPCB_SACKED_RETRANS && tp->retrans_out) + tp->retrans_out--; if(!(scb->flags & TCPCB_FLAG_SYN)) { - __u8 sacked = scb->sacked; - acked |= FLAG_DATA_ACKED; - if(sacked & TCPCB_SACKED_RETRANS) { + if(sacked & TCPCB_SACKED_RETRANS) acked |= FLAG_RETRANS_DATA_ACKED; - - /* XXX The race is, fast retrans frame --> - * XXX retrans timeout sends older frame --> - * XXX ACK arrives for fast retrans frame --> - * XXX retrans_out goes negative --> splat. - * XXX Please help me find a better way -DaveM - */ - if(tp->retrans_out) - tp->retrans_out--; - } if(tp->fackets_out) tp->fackets_out--; } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 08ca40a4bea5..328cc9389cf4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.142 1998/04/30 12:00:45 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.145 1998/05/02 12:47:13 davem Exp $ * * IPv4 specific functions * @@ -48,7 +48,6 @@ #include #include -#include #include #include #include @@ -61,12 +60,15 @@ #include #include +#include extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_syncookies; extern int sysctl_ip_dynaddr; +extern __u32 sysctl_wmem_max; +extern __u32 sysctl_rmem_max; /* Check TCP sequence numbers in ICMP packets. */ #define ICMP_MIN_LENGTH 8 @@ -166,17 +168,21 @@ struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum) return tb; } +#ifdef CONFIG_IP_TRANSPARENT_PROXY /* Ensure that the bound bucket for the port exists. * Return 0 on success. */ static __inline__ int tcp_bucket_check(unsigned short snum) { - if (tcp_bound_hash[tcp_bhashfn(snum)] == NULL && - tcp_bucket_create(snum) == NULL) + struct tcp_bind_bucket *tb = tcp_bound_hash[tcp_bhashfn(snum)]; + for( ; (tb && (tb->port != snum)); tb = tb->next) + ; + if(tb == NULL && tcp_bucket_create(snum) == NULL) return 1; else return 0; } +#endif static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum) { @@ -215,10 +221,21 @@ static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum) result = 1; } } - if((result == 0) && - (tb == NULL) && - (tcp_bucket_create(snum) == NULL)) - result = 1; + if(result == 0) { + if(tb == NULL) { + if(tcp_bucket_create(snum) == NULL) + result = 1; + } else { + /* It could be pending garbage collection, this + * kills the race and prevents it from disappearing + * out from under us by the time we use it. -DaveM + */ + if(tb->owners == NULL && !(tb->flags & TCPB_FLAG_LOCKED)) { + tb->flags = TCPB_FLAG_LOCKED; + tcp_dec_slow_timer(TCP_SLT_BUCKETGC); + } + } + } go_like_smoke: SOCKHASH_UNLOCK(); return result; @@ -1308,6 +1325,11 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (!newsk) goto exit; + if (newsk->rcvbuf < (3 * newsk->mtu)) + newsk->rcvbuf = min ((3 * newsk->mtu), sysctl_rmem_max); + if (newsk->sndbuf < (3 * newsk->mtu)) + newsk->sndbuf = min ((3 * newsk->mtu), sysctl_wmem_max); + sk->tp_pinfo.af_tcp.syn_backlog--; sk->ack_backlog++; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 9a0f9dfbb336..665a448bb50c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_timer.c,v 1.50 1998/04/14 09:08:59 davem Exp $ + * Version: $Id: tcp_timer.c,v 1.51 1998/05/02 15:19:26 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -448,26 +448,24 @@ void tcp_retransmit_timer(unsigned long data) */ if(tp->sack_ok) { struct sk_buff *skb = skb_peek(&sk->write_queue); - __u8 toclear = TCPCB_SACKED_ACKED; - if(tp->retransmits == 0) - toclear |= TCPCB_SACKED_RETRANS; while((skb != NULL) && (skb != tp->send_head) && (skb != (struct sk_buff *)&sk->write_queue)) { - TCP_SKB_CB(skb)->sacked &= ~(toclear); + TCP_SKB_CB(skb)->sacked &= + ~(TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS); skb = skb->next; } - tp->fackets_out = 0; } /* Retransmission. */ tp->retrans_head = NULL; + tp->fackets_out = 0; + tp->retrans_out = 0; if (tp->retransmits == 0) { /* remember window where we lost * "one half of the current window but at least 2 segments" */ - tp->retrans_out = 0; tp->snd_ssthresh = max(tp->snd_cwnd >> (1 + TCP_CWND_SHIFT), 2); tp->snd_cwnd = (1 << TCP_CWND_SHIFT); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index af29057ecb6f..0b826870f124 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -4,8 +4,9 @@ * * Authors: * Pedro Roque + * Andi Kleen * - * $Id: exthdrs.c,v 1.5 1998/02/12 07:43:39 davem Exp $ + * $Id: exthdrs.c,v 1.6 1998/04/30 16:24:20 freitag Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -34,6 +35,10 @@ #include #include +#include + +#define swap(a,b) do { typeof (a) tmp; tmp = (a); (a) = (b); (b) = (tmp); } while(0) + /* * inbound */ @@ -135,7 +140,7 @@ int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev, */ int ipv6opt_bld_rthdr(struct sk_buff *skb, struct ipv6_options *opt, - struct in6_addr *addr, int proto) + struct in6_addr *addr) { struct rt0_hdr *phdr, *ihdr; int hops; @@ -153,8 +158,76 @@ int ipv6opt_bld_rthdr(struct sk_buff *skb, struct ipv6_options *opt, ipv6_addr_copy(phdr->addr + (hops - 1), addr); - phdr->rt_hdr.nexthdr = proto; - + phdr->rt_hdr.nexthdr = proto; return NEXTHDR_ROUTING; } #endif + +/* + * find out if nexthdr is an extension header or a protocol + */ + +static __inline__ int ipv6_ext_hdr(u8 nexthdr) +{ + /* + * find out if nexthdr is an extension header or a protocol + */ + return ( (nexthdr == NEXTHDR_HOP) || + (nexthdr == NEXTHDR_ROUTING) || + (nexthdr == NEXTHDR_FRAGMENT) || + (nexthdr == NEXTHDR_ESP) || + (nexthdr == NEXTHDR_AUTH) || + (nexthdr == NEXTHDR_NONE) || + (nexthdr == NEXTHDR_DEST) ); + +} + +/* + * Skip any extension headers. This is used by the ICMP module. + * + * Note that strictly speaking this conflicts with RFC1883 4.0: + * ...The contents and semantics of each extension header determine whether + * or not to proceed to the next header. Therefore, extension headers must + * be processed strictly in the order they appear in the packet; a + * receiver must not, for example, scan through a packet looking for a + * particular kind of extension header and process that header prior to + * processing all preceding ones. + * + * We do exactly this. This is a protocol bug. We can't decide after a + * seeing an unknown discard-with-error flavour TLV option if it's a + * ICMP error message or not (errors should never be send in reply to + * ICMP error messages). + * + * But I see no other way to do this. This might need to be reexamined + * when Linux implements ESP (and maybe AUTH) headers. + */ +struct ipv6_opt_hdr *ipv6_skip_exthdr(struct ipv6_opt_hdr *hdr, + u8 *nexthdrp, int len) +{ + u8 nexthdr = *nexthdrp; + + while (ipv6_ext_hdr(nexthdr)) { + int hdrlen; + + if (nexthdr == NEXTHDR_NONE) + return NULL; + if (len < sizeof(struct ipv6_opt_hdr)) /* be anal today */ + return NULL; + + hdrlen = ipv6_optlen(hdr); + if (len < hdrlen) + return NULL; + + nexthdr = hdr->nexthdr; + hdr = (struct ipv6_opt_hdr *) ((u8*)hdr + hdrlen); + len -= hdrlen; + } + + /* Hack.. Do the same for AUTH headers? */ + if (nexthdr == NEXTHDR_ESP) + return NULL; + + *nexthdrp = nexthdr; + return hdr; +} + diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f181aec52d67..104895936220 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: icmp.c,v 1.15 1998/03/21 07:28:03 davem Exp $ + * $Id: icmp.c,v 1.17 1998/05/01 10:31:41 davem Exp $ * * Based on net/ipv4/icmp.c * @@ -21,6 +21,8 @@ * Changes: * * Andi Kleen : exception handling + * Andi Kleen add rate limits. never reply to a icmp. + * add more length checks and other fixes. */ #define __NO_VERSION__ @@ -51,6 +53,7 @@ #include #include #include +#include #include #include @@ -129,6 +132,62 @@ static int icmpv6_getfrag(const void *data, struct in6_addr *saddr, return 0; } + +/* + * Slightly more convenient version of icmpv6_send. + */ +void icmpv6_param_prob(struct sk_buff *skb, int code, void *pos) +{ + int offset = (u8*)pos - (u8*)skb->nh.ipv6h; + + icmpv6_send(skb, ICMPV6_PARAMPROB, code, offset, skb->dev); + kfree_skb(skb); +} + +static inline int is_icmp(struct ipv6hdr *hdr, int len) +{ + __u8 nexthdr = hdr->nexthdr; + + if (!ipv6_skip_exthdr((struct ipv6_opt_hdr *)(hdr+1), &nexthdr, len)) + return 0; + return nexthdr == IPPROTO_ICMP; +} + +int sysctl_icmpv6_time = 1*HZ; + +/* + * Check the ICMP output rate limit + */ +static inline int icmpv6_xrlim_allow(struct sock *sk, int type, + struct flowi *fl) +{ +#if 0 + struct dst_entry *dst; + int allow = 0; +#endif + /* Informational messages are not limited. */ + if (type & 0x80) + return 1; + +#if 0 /* not yet, first fix routing COW */ + + /* + * Look up the output route. + * XXX: perhaps the expire for routing entries cloned by + * this lookup should be more aggressive (not longer than timeout). + */ + dst = ip6_route_output(sk, fl, 1); + if (dst->error) + ipv6_statistics.Ip6OutNoRoutes++; + else + allow = xrlim_allow(dst, sysctl_icmpv6_time); + dst_release(dst); + return allow; +#else + return 1; +#endif +} + /* * an inline helper for the "simple" if statement below * checks if parameter problem report is caused by an @@ -214,6 +273,24 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, return; } + /* + * Never answer to a ICMP packet. + */ + if (is_icmp(hdr, (u8*)skb->tail - (u8*)hdr)) { + printk(KERN_DEBUG "icmpv6_send: no reply to icmp\n"); + return; + } + + fl.proto = IPPROTO_ICMPV6; + fl.nl_u.ip6_u.daddr = &hdr->saddr; + fl.nl_u.ip6_u.saddr = saddr; + fl.oif = iif; + fl.uli_u.icmpt.type = type; + fl.uli_u.icmpt.code = code; + + if (!icmpv6_xrlim_allow(sk, type, &fl)) + return; + /* * ok. kick it. checksum will be provided by the * getfrag_t callback. @@ -248,13 +325,6 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, msg.len = len; - fl.proto = IPPROTO_ICMPV6; - fl.nl_u.ip6_u.daddr = &hdr->saddr; - fl.nl_u.ip6_u.saddr = saddr; - fl.oif = iif; - fl.uli_u.icmpt.type = type; - fl.uli_u.icmpt.code = code; - ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1, MSG_DONTWAIT); @@ -312,21 +382,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) dst_release(xchg(&sk->dst_cache, NULL)); } -static __inline__ int ipv6_ext_hdr(u8 nexthdr) -{ - /* - * find out if nexthdr is an extension header or a protocol - */ - return ( (nexthdr == NEXTHDR_HOP) || - (nexthdr == NEXTHDR_ROUTING) || - (nexthdr == NEXTHDR_FRAGMENT) || - (nexthdr == NEXTHDR_ESP) || - (nexthdr == NEXTHDR_AUTH) || - (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); - -} - static void icmpv6_notify(struct sk_buff *skb, int type, int code, unsigned char *buff, int len, struct in6_addr *saddr, struct in6_addr *daddr, @@ -335,39 +390,22 @@ static void icmpv6_notify(struct sk_buff *skb, struct ipv6hdr *hdr = (struct ipv6hdr *) buff; struct inet6_protocol *ipprot; struct sock *sk; - char * pbuff; + struct ipv6_opt_hdr *pb; __u32 info = 0; int hash; u8 nexthdr; - /* now skip over extension headers */ - nexthdr = hdr->nexthdr; - pbuff = (char *) (hdr + 1); + pb = (struct ipv6_opt_hdr *) (hdr + 1); len -= sizeof(struct ipv6hdr); + if (len < 0) + return; - while (ipv6_ext_hdr(nexthdr)) { - int hdrlen; - - if (nexthdr == NEXTHDR_NONE) - return; - - nexthdr = *pbuff; - - /* Header length is size in 8-octet units, not - * including the first 8 octets. - */ - hdrlen = *(pbuff+1); - hdrlen = (hdrlen + 1) << 3; - - if (hdrlen > len) - return; - - /* Now this is right. */ - pbuff += hdrlen; - len -= hdrlen; - } + /* now skip over extension headers */ + pb = ipv6_skip_exthdr(pb, &nexthdr, len); + if (!pb) + return; hash = nexthdr & (MAX_INET_PROTOS - 1); @@ -378,7 +416,7 @@ static void icmpv6_notify(struct sk_buff *skb, continue; if (ipprot->err_handler) - ipprot->err_handler(skb, type, code, pbuff, info, + ipprot->err_handler(skb, type, code, (u8*)pb, info, saddr, daddr, ipprot); return; } @@ -391,7 +429,7 @@ static void icmpv6_notify(struct sk_buff *skb, return; while((sk = raw_v6_lookup(sk, nexthdr, daddr, saddr))) { - rawv6_err(sk, type, code, pbuff, saddr, daddr); + rawv6_err(sk, type, code, (char*)pb, saddr, daddr); sk = sk->next; } } @@ -514,7 +552,7 @@ discard_it: return 0; } -__initfunc(void icmpv6_init(struct net_proto_family *ops)) +__initfunc(int icmpv6_init(struct net_proto_family *ops)) { struct sock *sk; int err; @@ -528,11 +566,11 @@ __initfunc(void icmpv6_init(struct net_proto_family *ops)) icmpv6_socket->state = SS_UNCONNECTED; icmpv6_socket->type=SOCK_RAW; - if((err=ops->create(icmpv6_socket, IPPROTO_ICMPV6))<0) + if((err=ops->create(icmpv6_socket, IPPROTO_ICMPV6))<0) { printk(KERN_DEBUG "Failed to create the ICMP6 control socket.\n"); - - MOD_DEC_USE_COUNT; + return 1; + } sk = icmpv6_socket->sk; sk->allocation = GFP_ATOMIC; @@ -542,6 +580,16 @@ __initfunc(void icmpv6_init(struct net_proto_family *ops)) ndisc_init(ops); igmp6_init(ops); + return 0; +} + +void icmpv6_cleanup(void) +{ + inet6_del_protocol(&icmpv6_protocol); +#if 0 + ndisc_cleanup(); +#endif + igmp6_cleanup(); } static struct icmp6_err { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 71ad7e1a0977..5f024dddbd70 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -6,7 +6,7 @@ * Pedro Roque * Ian P. Morris * - * $Id: ip6_input.c,v 1.8 1998/02/12 07:43:43 davem Exp $ + * $Id: ip6_input.c,v 1.9 1998/04/30 16:24:24 freitag Exp $ * * Based in linux/net/ipv4/ip_input.c * @@ -70,12 +70,6 @@ struct ipv6_tlvtype { u8 len; }; -struct ipv6_destopt_hdr { - u8 nexthdr; - u8 hdrlen; -}; - - struct tlvtype_proc { u8 type; int (*func) (struct sk_buff *, struct device *dev, __u8 *ptr, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9bb2d4d3c654..ebd3365cd431 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -7,7 +7,7 @@ * * Based on linux/net/ipv4/ip_sockglue.c * - * $Id: ipv6_sockglue.c,v 1.18 1998/03/20 09:12:18 davem Exp $ + * $Id: ipv6_sockglue.c,v 1.19 1998/04/30 16:24:26 freitag Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -242,7 +242,7 @@ void ipv6_cleanup(void) ipv6_sysctl_unregister(); #endif ip6_route_cleanup(); - ndisc_cleanup(); + icmpv6_cleanup(); addrconf_cleanup(); } #endif diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 407698eb11b3..0e10dcf0b9ab 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: mcast.c,v 1.14 1998/03/20 09:12:18 davem Exp $ + * $Id: mcast.c,v 1.15 1998/04/30 16:24:28 freitag Exp $ * * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c * @@ -619,8 +619,6 @@ __initfunc(void igmp6_init(struct net_proto_family *ops)) printk(KERN_DEBUG "Failed to create the IGMP6 control socket.\n"); - MOD_DEC_USE_COUNT; - sk = igmp6_socket->sk; sk->allocation = GFP_ATOMIC; sk->num = 256; /* Don't receive any data */ @@ -632,3 +630,9 @@ __initfunc(void igmp6_init(struct net_proto_family *ops)) #endif } +void igmp6_cleanup(void) +{ +#ifdef CONFIG_PROC_FS + remove_proc_entry("net/igmp6", 0); +#endif +} diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2e437f2ded8d..e69d90332d64 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1152,9 +1152,6 @@ __initfunc(void ndisc_init(struct net_proto_family *ops)) printk(KERN_DEBUG "Failed to create the NDISC control socket.\n"); - /* Eeeh... What is it? --ANK */ - MOD_DEC_USE_COUNT; - sk = ndisc_socket->sk; sk->allocation = GFP_ATOMIC; sk->net_pinfo.af_inet6.hop_limit = 255; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 55fecc6766ac..e78cf97a23f9 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: reassembly.c,v 1.9 1998/02/12 07:43:48 davem Exp $ + * $Id: reassembly.c,v 1.10 1998/04/30 16:24:32 freitag Exp $ * * Based on: net/ipv4/ip_fragment.c * @@ -15,6 +15,11 @@ * 2 of the License, or (at your option) any later version. */ +/* + * Fixes: + * Andi Kleen Make it work with multiple hosts. + * More RFC compliance. + */ #include #include #include @@ -39,8 +44,9 @@ static struct frag_queue ipv6_frag_queue = { &ipv6_frag_queue, &ipv6_frag_queue, - 0, {0}, NULL, NULL, - 0 + 0, {{{0}}}, {{{0}}}, + {0}, NULL, NULL, + 0, 0, NULL }; static void create_frag_entry(struct sk_buff *skb, @@ -72,12 +78,11 @@ static int reasm_frag(struct frag_queue *fq, struct sk_buff **skb, * one it's the kmalloc for a struct ipv6_frag. * Feel free to try other alternatives... */ - reasm_queue(fq, *skb, fhdr); - if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) { fq->last_in = 1; fq->nhptr = nhptr; } + reasm_queue(fq, *skb, fhdr); if (fq->last_in) { if ((nh = reasm_frag_1(fq, skb))) @@ -90,18 +95,27 @@ static int reasm_frag(struct frag_queue *fq, struct sk_buff **skb, return 0; } -int ipv6_reassembly(struct sk_buff **skb, struct device *dev, __u8 *nhptr, +int ipv6_reassembly(struct sk_buff **skbp, struct device *dev, __u8 *nhptr, struct ipv6_options *opt) { - struct frag_hdr *fhdr = (struct frag_hdr *) ((*skb)->h.raw); + struct sk_buff *skb = *skbp; + struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw); struct frag_queue *fq; - + struct ipv6hdr *hdr; + + if ((u8 *)(fhdr+1) > skb->tail) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); + return 0; + } + hdr = skb->nh.ipv6h; for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) { - if (fq->id == fhdr->identification) - return reasm_frag(fq, skb, nhptr,fhdr); + if (fq->id == fhdr->identification && + !ipv6_addr_cmp(&hdr->saddr, &fq->saddr) && + !ipv6_addr_cmp(&hdr->daddr, &fq->daddr)) + return reasm_frag(fq, skbp, nhptr,fhdr); } - create_frag_entry(*skb, dev, nhptr, fhdr); + create_frag_entry(skb, dev, nhptr, fhdr); return 0; } @@ -154,6 +168,7 @@ static void create_frag_entry(struct sk_buff *skb, struct device *dev, struct frag_hdr *fhdr) { struct frag_queue *fq; + struct ipv6hdr *hdr; fq = (struct frag_queue *) kmalloc(sizeof(struct frag_queue), GFP_ATOMIC); @@ -167,6 +182,10 @@ static void create_frag_entry(struct sk_buff *skb, struct device *dev, fq->id = fhdr->identification; + hdr = skb->nh.ipv6h; + ipv6_addr_copy(&fq->saddr, &hdr->saddr); + ipv6_addr_copy(&fq->daddr, &hdr->daddr); + fq->dev = dev; /* init_timer has been done by the memset */ @@ -193,14 +212,14 @@ static void create_frag_entry(struct sk_buff *skb, struct device *dev, static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, - struct frag_hdr *fhdr) + struct frag_hdr *fhdr) { struct ipv6_frag *nfp, *fp, **bptr; nfp = (struct ipv6_frag *) kmalloc(sizeof(struct ipv6_frag), GFP_ATOMIC); - if (nfp == NULL) { + if (nfp == NULL) { kfree_skb(skb); return; } @@ -209,6 +228,10 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, nfp->len = (ntohs(skb->nh.ipv6h->payload_len) - ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); + if ((u32)nfp->offset + (u32)nfp->len > 65536) { + icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off); + goto err; + } nfp->skb = skb; nfp->fhdr = fhdr; @@ -224,19 +247,37 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, } if (fp && fp->offset == nfp->offset) { - if (fp->len != nfp->len) { - /* this cannot happen */ + if (nfp->len != fp->len) { printk(KERN_DEBUG "reasm_queue: dup with wrong len\n"); } /* duplicate. discard it. */ - kfree_skb(skb); - kfree(nfp); - return; + goto err; } *bptr = nfp; nfp->next = fp; + +#ifdef STRICT_RFC + if (fhdr->frag_off & __constant_htons(0x0001)) { + /* Check if the fragment is rounded to 8 bytes. + * Required by the RFC. + */ + if (nfp->len & 0x7) { + printk(KERN_DEBUG "fragment not rounded to 8bytes\n"); + + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + &skb->nh.ipv6h->payload_len); + goto err; + } + } +#endif + + return; + +err: + kfree(nfp); + kfree_skb(skb); } /* @@ -303,6 +344,8 @@ static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in) /* * FIXME: If we don't have a checksum we ought to be able * to defragment and checksum in this pass. [AC] + * Note that we don't really know yet whether the protocol + * needs checksums at all. It might still be a good idea. -AK */ for(fp = fq->fragments; fp; ) { struct ipv6_frag *back; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 721677fa6350..0a4a95c7c22c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: tcp_ipv6.c,v 1.78 1998/04/16 16:29:22 freitag Exp $ + * $Id: tcp_ipv6.c,v 1.80 1998/05/02 12:47:15 davem Exp $ * * Based on: * linux/net/ipv4/tcp.c @@ -123,10 +123,21 @@ static int tcp_v6_verify_bind(struct sock *sk, unsigned short snum) result = 1; } } - if((result == 0) && - (tb == NULL) && - (tcp_bucket_create(snum) == NULL)) - result = 1; + if(result == 0) { + if(tb == NULL) { + if(tcp_bucket_create(snum) == NULL) + result = 1; + } else { + /* It could be pending garbage collection, this + * kills the race and prevents it from disappearing + * out from under us by the time we use it. -DaveM + */ + if(tb->owners == NULL && !(tb->flags & TCPB_FLAG_LOCKED)) { + tb->flags = TCPB_FLAG_LOCKED; + tcp_dec_slow_timer(TCP_SLT_BUCKETGC); + } + } + } go_like_smoke: SOCKHASH_UNLOCK(); return result; @@ -731,7 +742,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, isn = tcp_v6_init_sequence(sk,skb); /* - * There are no SYN attacks on IPv6, yet... + * There are no SYN attacks on IPv6, yet... */ if (BACKLOG(sk) >= BACKLOGMAX(sk)) { printk(KERN_DEBUG "droping syn ack:%d max:%d\n", diff --git a/net/netsyms.c b/net/netsyms.c index 5d380fbb6c07..52be530335a9 100644 --- a/net/netsyms.c +++ b/net/netsyms.c @@ -458,7 +458,7 @@ EXPORT_SYMBOL(qdisc_put_rtab); EXPORT_SYMBOL(qdisc_new_estimator); EXPORT_SYMBOL(qdisc_kill_estimator); #endif -#ifdef CONFIG_NET_POLICE +#ifdef CONFIG_NET_CLS_POLICE EXPORT_SYMBOL(tcf_police); EXPORT_SYMBOL(tcf_police_locate); EXPORT_SYMBOL(tcf_police_destroy); -- 2.39.5