aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkuznet <kuznet>1997-10-29 20:26:58 +0000
committerkuznet <kuznet>1997-10-29 20:26:58 +0000
commite5afd356a411a932cdd9f89b3db6d7874aa41cd5 (patch)
tree27b56b9e6e8077fc2b2323b85c278cbd63b6dc47
parent4dca5013d37ab0123f6f3c846a495d5b33a5db43 (diff)
downloadnetdev-vger-cvs-e5afd356a411a.tar.gz
Package of tunnels over IP --- IPIP, GRE and SIT.
These three drivers are clones of net/ipv4/ip_gre.c, so that look for comments there. ipip.c, sit.c and ip_gre.c contain a lot of common code, but I did not found a simple solution to merge them together. Fixes to another files are one-liners to teach ARP to understand new ARPHRD_*. Also, IPv6 had hardwired dependences on ethernet. New tunnels (except for sit and dvmrp) can be created by iproute2 utility. Look at CONFIG_NET_IPGRE_BROADCAST, it is nice toy :-) Yes, they are modular, but I did not tested it. I am sure, several necessary symbols are not exported properly. Please, check it, I do not use modules myself.
-rw-r--r--Documentation/Configure.help17
-rw-r--r--include/linux/if_arp.h2
-rw-r--r--include/linux/if_tunnel.h22
-rw-r--r--include/linux/igmp.h2
-rw-r--r--include/linux/mroute.h10
-rw-r--r--include/net/ipip.h29
-rw-r--r--include/net/sit.h41
-rw-r--r--net/ipv4/Config.in4
-rw-r--r--net/ipv4/Makefile8
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/igmp.c6
-rw-r--r--net/ipv4/ip_gre.c1182
-rw-r--r--net/ipv4/ipip.c832
-rw-r--r--net/ipv4/ipmr.c263
-rw-r--r--net/ipv4/protocol.c20
-rw-r--r--net/ipv6/addrconf.c38
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/ipv6_sockglue.c3
-rw-r--r--net/ipv6/mcast.c7
-rw-r--r--net/ipv6/ndisc.c10
-rw-r--r--net/ipv6/sit.c943
22 files changed, 2656 insertions, 803 deletions
diff --git a/Documentation/Configure.help b/Documentation/Configure.help
index d45c26d68..e33bb3187 100644
--- a/Documentation/Configure.help
+++ b/Documentation/Configure.help
@@ -1156,7 +1156,22 @@ CONFIG_NET_IPIP
one encapsulator called tunnel.o and one decapsulator called
ipip.o. You can read details in drivers/net/README.tunnel. Most
people won't need this and can say N.
-
+
+IP: GRE tunnels over IP
+CONFIG_NET_IPGRE
+ Another kind of tunneling protocol - "Generic Routing Encapsulation".
+ It allows to tunnel any networking protocol over existing IPv4
+ infrastructure. At the moment only IPv4 and IPv6 are supported.
+ It is useful, if another endpoint is Cisco router: it likes
+ GRE much more than IPIP and, particularly, allows multicasts
+ redistribution over GRE tunnels.
+
+IP: broadcast GRE over IP
+CONFIG_NET_IPGRE_BROADCAST
+ One application of GRE/IP, allowing to construct broadcast LAN,
+ looking like ethernet network, distributed over the Internet.
+ It requires, that your domain supported multicast routing.
+
IP: firewall packet logging
CONFIG_IP_FIREWALL_VERBOSE
This gives you information about what your firewall did with
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index c93764cb7..97e6a0047 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -60,6 +60,8 @@
#define ARPHRD_BIF 775 /* AP1000 BIF */
#define ARPHRD_SIT 776 /* sit0 device - IPv6-in-IPv4 */
#define ARPHRD_IPDDP 777 /* IP over DDP tunneller */
+#define ARPHRD_IPGRE 778 /* GRE over IP */
+#define ARPHRD_PIMREG 779 /* PIMSM register interface */
/* ARP protocol opcodes. */
#define ARPOP_REQUEST 1 /* ARP request */
diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index b5075a419..bef9f8fd9 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -1,12 +1,28 @@
#ifndef _IF_TUNNEL_H_
#define _IF_TUNNEL_H_
-#define SIOCGTUNNEL (SIOCDEVPRIVATE + 0)
-#define SIOCSTUNNEL (SIOCDEVPRIVATE + 1)
+#define SIOCGETTUNNEL (SIOCDEVPRIVATE + 0)
+#define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1)
+#define SIOCDELTUNNEL (SIOCDEVPRIVATE + 2)
+#define SIOCCHGTUNNEL (SIOCDEVPRIVATE + 3)
-struct ipip_parms
+#define GRE_CSUM __constant_htons(0x8000)
+#define GRE_ROUTING __constant_htons(0x4000)
+#define GRE_KEY __constant_htons(0x2000)
+#define GRE_SEQ __constant_htons(0x1000)
+#define GRE_STRICT __constant_htons(0x0800)
+#define GRE_REC __constant_htons(0x0700)
+#define GRE_FLAGS __constant_htons(0x00F8)
+#define GRE_VERSION __constant_htons(0x0007)
+
+struct ip_tunnel_parm
{
+ char name[IFNAMSIZ];
int link;
+ __u16 i_flags;
+ __u16 o_flags;
+ __u32 i_key;
+ __u32 o_key;
struct iphdr iph;
};
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 92fcde6f4..8be2d1b87 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -122,5 +122,7 @@ extern void ip_mc_init_dev(struct in_device *);
extern void ip_mc_destroy_dev(struct in_device *);
extern void ip_mc_up(struct in_device *);
extern void ip_mc_down(struct in_device *);
+extern int ip_mc_dec_group(struct in_device *in_dev, u32 addr);
+extern void ip_mc_inc_group(struct in_device *in_dev, u32 addr);
#endif
#endif
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 517d52ac0..55193867d 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -113,7 +113,7 @@ struct sioc_vif_req
struct igmpmsg
{
- unsigned long unused1,unused2;
+ __u32 unused1,unused2;
unsigned char im_msgtype; /* What is this */
unsigned char im_mbz; /* Must be zero */
unsigned char im_vif; /* Interface (this ought to be a vifi_t!) */
@@ -144,7 +144,7 @@ struct vif_device
unsigned long rate_limit; /* Traffic shaping (NI) */
unsigned char threshold; /* TTL threshold */
unsigned short flags; /* Control flags */
- unsigned long local,remote; /* Addresses(remote for tunnels)*/
+ __u32 local,remote; /* Addresses(remote for tunnels)*/
int link; /* Physical interface index */
};
@@ -158,7 +158,7 @@ struct mfc_cache
int mfc_flags; /* Flags on line */
struct sk_buff_head mfc_unresolved; /* Unresolved buffers */
int mfc_queuelen; /* Unresolved buffer counter */
- unsigned mfc_last_assert;
+ unsigned long mfc_last_assert;
int mfc_minvif;
int mfc_maxvif;
unsigned long mfc_bytes;
@@ -203,12 +203,12 @@ struct mfc_cache
#define PIM_NULL_REGISTER __constant_htonl(0x40000000)
-/* PIMv2 register message header layout */
+/* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */
struct pimreghdr
{
__u8 type;
- __u8 addrlen;
+ __u8 reserved;
__u16 csum;
__u32 flags;
};
diff --git a/include/net/ipip.h b/include/net/ipip.h
index bddbe13b3..22c464c3c 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -3,18 +3,31 @@
#include <linux/if_tunnel.h>
-extern void ipip_err(struct sk_buff *skb, unsigned char*, int);
-extern int ipip_rcv(struct sk_buff *skb, unsigned short len);
+/* Keep error state on tunnel for 30 sec */
+#define IPTUNNEL_ERR_TIMEO (30*HZ)
-struct ipip_tunnel
+struct ip_tunnel
{
- struct device *next;
- struct device *next_active;
- struct ipip_parms parms;
+ struct ip_tunnel *next;
+ struct device *dev;
struct net_device_stats stat;
- char name[0];
+
+ int recursion; /* Depth of hard_start_xmit recursion */
+ int err_count; /* Number of arrived ICMP errors */
+ unsigned long err_time; /* Time when the last ICMP error arrived */
+
+ /* These four fields used only by GRE */
+ __u32 i_seqno; /* The last seen seqno */
+ __u32 o_seqno; /* The last output seqno */
+ int hlen; /* Precalculated GRE header length */
+ int mlink;
+
+ struct ip_tunnel_parm parms;
};
-extern int ipip_tunnel_init(struct device *dev);
+extern int ipip_init(void);
+extern int ipgre_init(void);
+extern int sit_init(void);
+extern void sit_cleanup(void);
#endif
diff --git a/include/net/sit.h b/include/net/sit.h
deleted file mode 100644
index 64ba14b77..000000000
--- a/include/net/sit.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * SIT tunneling device - definitions
- * Linux INET6 implementation
- *
- * Authors:
- * Pedro Roque <roque@di.fc.ul.pt>
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _NET_SIT_H
-#define _NET_SIT_H
-
-struct sit_mtu_info {
- __u32 addr; /* IPv4 destination */
- unsigned long tstamp; /* last use tstamp */
- __u32 mtu; /* Path MTU */
- struct sit_mtu_info *next;
-};
-
-struct sit_vif {
- char name[8];
- struct device *dev;
- struct sit_vif *next;
-};
-
-#define SIT_PEER(dev) (*(u32*)(dev)->dev_addr)
-
-extern int sit_init(void);
-extern void sit_cleanup(void);
-
-extern struct device * sit_add_tunnel(__u32 dstaddr);
-
-#define SIT_GC_TIMEOUT (3*60*HZ)
-#define SIT_GC_FREQUENCY (2*60*HZ)
-
-#endif
diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in
index aa6977aa4..ea50576ab 100644
--- a/net/ipv4/Config.in
+++ b/net/ipv4/Config.in
@@ -39,7 +39,11 @@ if [ "$CONFIG_IP_MASQUERADE" != "n" ]; then
fi
bool 'IP: optimize as router not host' CONFIG_IP_ROUTER
tristate 'IP: tunneling' CONFIG_NET_IPIP
+tristate 'IP: GRE tunnels over IP' CONFIG_NET_IPGRE
if [ "$CONFIG_IP_MULTICAST" = "y" ]; then
+ if [ "$CONFIG_NET_IPGRE" != "n" ]; then
+ bool 'IP: broadcast GRE over IP' CONFIG_NET_IPGRE_BROADCAST
+ fi
bool 'IP: multicast routing' CONFIG_IP_MROUTE
if [ "$CONFIG_IP_MROUTE" = "y" ]; then
bool 'IP: PIM-SM version 1 support' CONFIG_IP_PIMSM_V1
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 91cba0500..e42be4c42 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,6 +47,14 @@ else
endif
endif
+ifeq ($(CONFIG_NET_IPGRE),y)
+IPV4X_OBJS += ip_gre.o
+else
+ ifeq ($(CONFIG_NET_IPGRE),m)
+ MX_OBJS += ip_gre.o
+ endif
+endif
+
ifeq ($(CONFIG_IP_MASQUERADE),y)
IPV4X_OBJS += ip_masq.o ip_masq_app.o
M_OBJS += ip_masq_ftp.o ip_masq_irc.o ip_masq_raudio.o ip_masq_quake.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7412eba65..e303e980b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
*
* AF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.57 1997-10-19 13:37:14 freitag Exp $
+ * Version: $Id: af_inet.c,v 1.58 1997-10-29 20:27:21 kuznet Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -95,6 +95,7 @@
#include <net/sock.h>
#include <net/raw.h>
#include <net/icmp.h>
+#include <net/ipip.h>
#include <net/inet_common.h>
#include <linux/ip_fw.h>
#ifdef CONFIG_IP_MROUTE
@@ -1118,6 +1119,16 @@ __initfunc(void inet_proto_init(struct net_proto *pro))
icmp_init(&inet_family_ops);
+ /* I wish inet_add_protocol had no constructor hook...
+ I had to move IPIP from net/ipv4/protocol.c :-( --ANK
+ */
+#ifdef CONFIG_NET_IPIP
+ ipip_init();
+#endif
+#ifdef CONFIG_NET_IPGRE
+ ipgre_init();
+#endif
+
/*
* Set the firewalling up
*/
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 13ad2fbd1..e4aa6c306 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1,6 +1,6 @@
/* linux/net/inet/arp.c
*
- * Version: $Id: arp.c,v 1.52 1997-10-11 02:53:27 davem Exp $
+ * Version: $Id: arp.c,v 1.53 1997-10-29 20:27:27 kuznet Exp $
*
* Copyright (C) 1994 by Florian La Roche
*
@@ -1391,6 +1391,7 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
case ARPHRD_METRICOM:
case ARPHRD_IEEE802:
case ARPHRD_FDDI:
+ case ARPHRD_IPGRE:
if(arp->ar_pro != htons(ETH_P_IP))
goto out;
break;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7b2aa3b76..723a8842b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,7 +8,7 @@
* the older version didn't come out right using gcc 2.5.8, the newer one
* seems to fall out with gcc 2.6.2.
*
- * Version: $Id: igmp.c,v 1.21 1997-10-10 22:40:57 davem Exp $
+ * Version: $Id: igmp.c,v 1.22 1997-10-29 20:27:24 kuznet Exp $
*
* Authors:
* Alan Cox <Alan.Cox@linux.org>
@@ -432,7 +432,7 @@ static void igmp_group_added(struct ip_mc_list *im)
* A socket has joined a multicast group on device dev.
*/
-static void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
+void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
{
struct ip_mc_list *i, *im;
@@ -472,7 +472,7 @@ static void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
* A socket has left a multicast group on device dev
*/
-static int ip_mc_dec_group(struct in_device *in_dev, u32 addr)
+int ip_mc_dec_group(struct in_device *in_dev, u32 addr)
{
struct ip_mc_list *i, **ip;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
new file mode 100644
index 000000000..96bb70cf2
--- /dev/null
+++ b/net/ipv4/ip_gre.c
@@ -0,0 +1,1182 @@
+/*
+ * Linux NET3: GRE over IP protocol decoder.
+ *
+ * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/ipip.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+
+/*
+ Problems & solutions
+ --------------------
+
+ 1. The most important issue is detecting local dead loops.
+ They would cause complete host lockup in transmit, which
+ would be "resolved" by stack overflow or, if queueing is enabled,
+ with infinite looping in net_bh.
+
+ We cannot track such dead loops during route installation,
+ it is infeasible task. The most general solutions would be
+ to keep skb->encapsulation counter (sort of local ttl),
+ and silently drop packet when it expires. It is the best
+ solution, but it supposes maintaing new variable in ALL
+ skb, even if no tunneling is used.
+
+ Current solution: t->recursion lock breaks dead loops. It looks
+ like dev->tbusy flag, but I preferred new variable, because
+ the semantics is different. One day, when hard_start_xmit
+ will be multithreaded we will have to use skb->encapsulation.
+
+
+
+ 2. Networking dead loops would not kill routers, but would really
+ kill network. IP hop limit plays role of "t->recursion" in this case,
+ if we copy it from packet being encapsulated to upper header.
+ It is very good solution, but it introduces two problems:
+
+ - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
+ do not work over tunnels.
+ - traceroute does not work. I planned to relay ICMP from tunnel,
+ so that this problem would be solved and traceroute output
+ would even more informative. This idea appeared to be wrong:
+ only Linux complies to rfc1812 now (yes, guys, Linux is the only
+ true router now :-)), all routers (at least, in neighbourhood of mine)
+ return only 8 bytes of payload. It is the end.
+
+ Hence, if we want that OSPF worked or traceroute said something reasonable,
+ we should search for another solution.
+
+ One of them is to parse packet trying to detect inner encapsulation
+ made by our node. It is difficult or even impossible, especially,
+ taking into account fragmentation. TO be short, tt is not solution at all.
+
+ Current solution: The solution was UNEXPECTEDLY SIMPLE.
+ We force DF flag on tunnels with preconfigured hop limit,
+ that is ALL. :-) Well, it does not remove the problem completely,
+ but exponential growth of network traffic is changed to linear
+ (branches, that exceed pmtu are pruned) and tunnel mtu
+ fastly degrades to value <68, where looping stops.
+ Yes, it is not good if there exists a router in the loop,
+ which does not force DF, even when encapsulating packets have DF set.
+ But it is not our problem! Nobody could accuse us, we made
+ all that we could make. Even if it is your gated who injected
+ fatal route to network, even if it were you who configured
+ fatal static route: you are innocent. :-)
+
+
+
+ 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
+ practically identical code. It would be good to glue them
+ together, but it is not very evident, how to make them modular.
+ sit is integral part of IPv6, ipip and gre are naturally modular.
+ We could extract common parts (hash table, ioctl etc)
+ to a separate module (ip_tunnel.c).
+
+ Alexey Kuznetsov.
+ */
+
+static int ipgre_tunnel_init(struct device *dev);
+
+/* Fallback tunnel: no source, no destination, no key, no options */
+
+static int ipgre_fb_tunnel_init(struct device *dev);
+
+static struct device ipgre_fb_tunnel_dev = {
+ NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipgre_fb_tunnel_init,
+};
+
+static struct ip_tunnel ipgre_fb_tunnel = {
+ NULL, &ipgre_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", }
+};
+
+/* Tunnel hash table */
+
+/*
+ 4 hash tables:
+
+ 3: (remote,local)
+ 2: (remote,*)
+ 1: (*,local)
+ 0: (*,*)
+
+ We require exact key match i.e. if a key is present in packet
+ it will match only tunnel with the same key; if it is not present,
+ it will match only keyless tunnel.
+
+ All keysless packets, if not matched configured keyless tunnels
+ will match fallback tunnel.
+ */
+
+#define HASH_SIZE 16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
+
+static struct ip_tunnel *tunnels[4][HASH_SIZE];
+
+#define tunnels_r_l (tunnels[3])
+#define tunnels_r (tunnels[2])
+#define tunnels_l (tunnels[1])
+#define tunnels_wc (tunnels[0])
+
+/* Given src, dst and key, find approriate for input tunnel. */
+
+static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
+{
+ unsigned h0 = HASH(remote);
+ unsigned h1 = HASH(key);
+ struct ip_tunnel *t;
+
+ for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
+ if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ }
+ for (t = tunnels_r[h0^h1]; t; t = t->next) {
+ if (remote == t->parms.iph.daddr) {
+ if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ }
+ for (t = tunnels_l[h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr ||
+ (local == t->parms.iph.daddr && MULTICAST(local))) {
+ if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ }
+ for (t = tunnels_wc[h1]; t; t = t->next) {
+ if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ if (ipgre_fb_tunnel_dev.flags&IFF_UP)
+ return &ipgre_fb_tunnel;
+ return NULL;
+}
+
+static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+{
+ u32 remote = parms->iph.daddr;
+ u32 local = parms->iph.saddr;
+ u32 key = parms->i_key;
+ struct ip_tunnel *t, **tp, *nt;
+ struct device *dev;
+ unsigned h = HASH(key);
+ int prio = 0;
+
+ if (local)
+ prio |= 1;
+ if (remote && !MULTICAST(remote)) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
+ if (key == t->parms.i_key)
+ return t;
+ }
+ }
+ if (!create)
+ return NULL;
+
+ MOD_INC_USE_COUNT;
+ dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
+ if (dev == NULL) {
+ MOD_DEC_USE_COUNT;
+ return NULL;
+ }
+ memset(dev, 0, sizeof(*dev) + sizeof(*t));
+ dev->priv = (void*)(dev+1);
+ nt = (struct ip_tunnel*)dev->priv;
+ nt->dev = dev;
+ dev->name = nt->parms.name;
+ dev->init = ipgre_tunnel_init;
+ memcpy(&nt->parms, parms, sizeof(*parms));
+ if (dev->name[0] == 0) {
+ int i;
+ for (i=1; i<100; i++) {
+ sprintf(dev->name, "gre%d", i);
+ if (dev_get(dev->name) == NULL)
+ break;
+ }
+ if (i==100)
+ goto failed;
+ memcpy(parms->name, dev->name, IFNAMSIZ);
+ }
+ if (register_netdevice(dev) < 0)
+ goto failed;
+
+ start_bh_atomic();
+ nt->next = t;
+ *tp = nt;
+ end_bh_atomic();
+ /* Do not decrement MOD_USE_COUNT here. */
+ return nt;
+
+failed:
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ return NULL;
+}
+
+static void ipgre_tunnel_destroy(struct device *dev)
+{
+ struct ip_tunnel *t, **tp;
+ struct ip_tunnel *t0 = (struct ip_tunnel*)dev->priv;
+ u32 remote = t0->parms.iph.daddr;
+ u32 local = t0->parms.iph.saddr;
+ unsigned h = HASH(t0->parms.i_key);
+ int prio = 0;
+
+ if (local)
+ prio |= 1;
+ if (remote && !MULTICAST(remote)) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (t == t0) {
+ *tp = t->next;
+ if (dev != &ipgre_fb_tunnel_dev) {
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ }
+ break;
+ }
+ }
+}
+
+
+void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len)
+{
+#ifndef I_WISH_WORLD_WERE_PERFECT
+
+/* It is not :-( All the routers (except for Linux) return only
+ 8 bytes of packet payload. It means, that precise relaying of
+ ICMP in the real Internet is absolutely infeasible.
+
+ Moreover, Cisco "wise men" put GRE key to the third word
+ in GRE header. It makes impossible maintaining even soft state for keyed
+ GRE tunnels with enabled checksum. Tell them "thank you".
+
+ Well, I wonder, rfc1812 was written by Cisco employee,
+ what the hell these idiots break standrads established
+ by themself???
+ */
+
+ struct iphdr *iph = (struct iphdr*)dp;
+ u16 *p = (u16*)(dp+(iph->ihl<<2));
+ int grehlen = (iph->ihl<<2) + 4;
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ struct ip_tunnel *t;
+ u16 flags;
+
+ flags = p[0];
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ return;
+ if (flags&GRE_KEY) {
+ grehlen += 4;
+ if (flags&GRE_CSUM)
+ grehlen += 4;
+ }
+ }
+
+ /* If only 8 bytes returned, keyed message will be dropped here */
+ if (len < grehlen)
+ return;
+
+ switch (type) {
+ default:
+ case ICMP_PARAMETERPROB:
+ return;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* Soft state for pmtu is maintained by IP core. */
+ return;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+ }
+
+ t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
+ if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
+ return;
+
+ if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+ return;
+
+ if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+ t->err_count++;
+ else
+ t->err_count = 1;
+ t->err_time = jiffies;
+ return;
+#else
+ struct iphdr *iph = (struct iphdr*)dp;
+ struct iphdr *eiph;
+ u16 *p = (u16*)(dp+(iph->ihl<<2));
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ int rel_type = 0;
+ int rel_code = 0;
+ int rel_info = 0;
+ u16 flags;
+ int grehlen = (iph->ihl<<2) + 4;
+ struct sk_buff *skb2;
+ struct rtable *rt;
+
+ if (p[1] != __constant_htons(ETH_P_IP))
+ return;
+
+ flags = p[0];
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ return;
+ if (flags&GRE_CSUM)
+ grehlen += 4;
+ if (flags&GRE_KEY)
+ grehlen += 4;
+ if (flags&GRE_SEQ)
+ grehlen += 4;
+ }
+ if (len < grehlen + sizeof(struct iphdr))
+ return;
+ eiph = (struct iphdr*)(dp + grehlen);
+
+ switch (type) {
+ default:
+ return;
+ case ICMP_PARAMETERPROB:
+ if (skb->h.icmph->un.gateway < (iph->ihl<<2))
+ return;
+
+ /* So... This guy found something strange INSIDE encapsulated
+ packet. Well, he is fool, but what can we do ?
+ */
+ rel_type = ICMP_PARAMETERPROB;
+ rel_info = skb->h.icmph->un.gateway - grehlen;
+ break;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* And it is the only really necesary thing :-) */
+ rel_info = ntohs(skb->h.icmph->un.frag.mtu);
+ if (rel_info < grehlen+68)
+ return;
+ rel_info -= grehlen;
+ /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
+ if (rel_info > ntohs(eiph->tot_len))
+ return;
+ break;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe, it is just ether pollution. --ANK
+ */
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+ }
+
+ /* Prepare fake skb to feed it to icmp_send */
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2 == NULL)
+ return;
+ dst_release(skb2->dst);
+ skb2->dst = NULL;
+ skb_pull(skb2, skb->data - (u8*)eiph);
+ skb2->nh.raw = skb2->data;
+
+ /* Try to guess incoming interface */
+ if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ skb2->dev = rt->u.dst.dev;
+
+ /* route "incoming" packet */
+ if (rt->rt_flags&RTCF_LOCAL) {
+ ip_rt_put(rt);
+ rt = NULL;
+ if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
+ rt->u.dst.dev->type != ARPHRD_IPGRE) {
+ ip_rt_put(rt);
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ } else {
+ ip_rt_put(rt);
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
+ skb2->dst->dev->type != ARPHRD_IPGRE) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ }
+
+ /* change mtu on this route */
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ if (rel_info > skb2->dst->pmtu) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ skb2->dst->pmtu = rel_info;
+ rel_info = htonl(rel_info);
+ } else if (type == ICMP_TIME_EXCEEDED) {
+ struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+ if (t->parms.iph.ttl) {
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ }
+ }
+
+ icmp_send(skb2, rel_type, rel_code, rel_info);
+ kfree_skb(skb2, FREE_WRITE);
+#endif
+}
+
+int ipgre_rcv(struct sk_buff *skb, unsigned short len)
+{
+ struct iphdr *iph = skb->nh.iph;
+ u8 *h = skb->h.raw;
+ u16 flags = *(u16*)h;
+ u16 csum = 0;
+ u32 key = 0;
+ u32 seqno = 0;
+ struct ip_tunnel *tunnel;
+ int offset = 4;
+
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
+ /* - Version must be 0.
+ - We do not support routing headers.
+ */
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ goto drop;
+
+ if (flags&GRE_CSUM) {
+ csum = ip_compute_csum(h, len);
+ offset += 4;
+ }
+ if (flags&GRE_KEY) {
+ key = *(u32*)(h + offset);
+ offset += 4;
+ }
+ if (flags&GRE_SEQ) {
+ seqno = ntohl(*(u32*)(h + offset));
+ offset += 4;
+ }
+ }
+
+ if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
+ skb->nh.raw = skb_pull(skb, h + offset - skb->data);
+ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+ skb->ip_summed = 0;
+ skb->protocol = *(u16*)(h + 2);
+ skb->pkt_type = PACKET_HOST;
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+ if (MULTICAST(iph->daddr)) {
+ /* Looped back packet, drop it! */
+ if (((struct rtable*)skb->dst)->key.iif == 0)
+ goto drop;
+ tunnel->stat.multicast++;
+ skb->pkt_type = PACKET_BROADCAST;
+ }
+#endif
+
+ if (((flags&GRE_CSUM) && csum) ||
+ (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+ tunnel->stat.rx_crc_errors++;
+ tunnel->stat.rx_errors++;
+ goto drop;
+ }
+ if (tunnel->parms.i_flags&GRE_SEQ) {
+ if (!(flags&GRE_SEQ) ||
+ (tunnel->i_seqno && seqno - tunnel->i_seqno < 0)) {
+ tunnel->stat.rx_fifo_errors++;
+ tunnel->stat.rx_errors++;
+ goto drop;
+ }
+ tunnel->i_seqno = seqno + 1;
+ }
+ tunnel->stat.rx_packets++;
+ tunnel->stat.rx_bytes += skb->len;
+ skb->dev = tunnel->dev;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ netif_rx(skb);
+ return(0);
+ }
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb, FREE_READ);
+ return(0);
+}
+
+static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev)
+{
+ struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct net_device_stats *stats = &tunnel->stat;
+ struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *tiph;
+ u8 tos;
+ u16 df;
+ struct rtable *rt; /* Route to the other host */
+ struct device *tdev; /* Device to other host */
+ struct iphdr *iph; /* Our new IP header */
+ int max_headroom; /* The extra header space needed */
+ int gre_hlen;
+ u32 dst;
+ int mtu;
+
+ if (tunnel->recursion++) {
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
+
+ if (dev->hard_header) {
+ gre_hlen = 0;
+ tiph = (struct iphdr*)skb->data;
+ } else {
+ gre_hlen = tunnel->hlen;
+ tiph = &tunnel->parms.iph;
+ }
+
+ if ((dst = tiph->daddr) == 0) {
+ /* NBMA tunnel */
+
+ if (skb->dst == NULL) {
+ tunnel->stat.tx_fifo_errors++;
+ goto tx_error;
+ }
+
+ if (skb->protocol == __constant_htons(ETH_P_IP)) {
+ rt = (struct rtable*)skb->dst;
+ if ((dst = rt->rt_gateway) == 0)
+ goto tx_error_icmp;
+ }
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+ struct in6_addr *addr6;
+ int addr_type;
+ struct nd_neigh *neigh = (struct nd_neigh *) skb->dst->neighbour;
+
+ if (neigh == NULL)
+ goto tx_error;
+
+ addr6 = &neigh->ndn_addr;
+ addr_type = ipv6_addr_type(addr6);
+
+ if (addr_type == IPV6_ADDR_ANY) {
+ addr6 = &skb->nh.ipv6h->daddr;
+ addr_type = ipv6_addr_type(addr6);
+ }
+
+ if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+ goto tx_error_icmp;
+
+ dst = addr6->s6_addr32[3];
+ }
+#endif
+ else
+ goto tx_error;
+ }
+
+ tos = tiph->tos;
+ if (tos&1) {
+ if (skb->protocol == __constant_htons(ETH_P_IP))
+ tos = old_iph->tos;
+ tos &= ~1;
+ }
+
+ if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+ tunnel->stat.tx_carrier_errors++;
+ goto tx_error;
+ }
+ tdev = rt->u.dst.dev;
+
+ if (tdev == dev) {
+ ip_rt_put(rt);
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
+
+ df = tiph->frag_off;
+ mtu = rt->u.dst.pmtu - tunnel->hlen;
+
+ if (skb->protocol == __constant_htons(ETH_P_IP)) {
+ if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68)
+ skb->dst->pmtu = mtu;
+
+ df |= (old_iph->frag_off&__constant_htons(IP_DF));
+
+ if ((old_iph->frag_off&__constant_htons(IP_DF)) &&
+ mtu < ntohs(old_iph->tot_len)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ }
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+ struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
+
+ if (rt6 && mtu < rt6->u.dst.pmtu && mtu >= 576) {
+ if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
+ rt6->rt6i_dst.plen == 128) {
+ rt6->rt6i_flags |= RTF_MODIFIED;
+ skb->dst->pmtu = mtu;
+ }
+ }
+
+ if (mtu >= 576 && mtu < skb->len - tunnel->hlen + gre_hlen) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ }
+#endif
+
+ if (tunnel->err_count > 0) {
+ if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+ tunnel->err_count--;
+
+ if (skb->protocol == __constant_htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6))
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev);
+#endif
+ } else
+ tunnel->err_count = 0;
+ }
+
+ skb->h.raw = skb->nh.raw;
+
+ max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen;
+
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+ struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb) {
+ ip_rt_put(rt);
+ stats->tx_dropped++;
+ dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
+ return 0;
+ }
+ dev_kfree_skb(skb, FREE_WRITE);
+ skb = new_skb;
+ }
+
+ skb->nh.raw = skb_push(skb, gre_hlen);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /*
+ * Push down and install the IPIP header.
+ */
+
+ iph = skb->nh.iph;
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr) >> 2;
+ iph->frag_off = df;
+ iph->protocol = IPPROTO_GRE;
+ iph->tos = tos;
+ iph->daddr = rt->rt_dst;
+ iph->saddr = rt->rt_src;
+
+ if ((iph->ttl = tiph->ttl) == 0) {
+ if (skb->protocol == __constant_htons(ETH_P_IP))
+ iph->ttl = old_iph->ttl;
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6))
+ iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
+#endif
+ else
+ iph->ttl = ip_statistics.IpDefaultTTL;
+ }
+
+ ((u16*)(iph+1))[0] = tunnel->parms.o_flags;
+ ((u16*)(iph+1))[1] = skb->protocol;
+
+ if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+ u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4);
+
+ if (tunnel->parms.o_flags&GRE_SEQ) {
+ ++tunnel->o_seqno;
+ *ptr = htonl(tunnel->o_seqno);
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_KEY) {
+ *ptr = tunnel->parms.o_key;
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_CSUM) {
+ *ptr = 0;
+ *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
+ }
+ }
+
+ iph->tot_len = htons(skb->len);
+ iph->id = htons(ip_id_count++);
+ ip_send_check(iph);
+
+ stats->tx_bytes += skb->len;
+ stats->tx_packets++;
+ ip_send(skb);
+ tunnel->recursion--;
+ return 0;
+
+tx_error_icmp:
+ if (skb->protocol == __constant_htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6))
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev);
+#endif
+
+tx_error:
+ stats->tx_errors++;
+ dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
+ return 0;
+}
+
+static int
+ipgre_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip_tunnel_parm p;
+ struct ip_tunnel *t;
+
+ MOD_INC_USE_COUNT;
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ t = NULL;
+ if (dev == &ipgre_fb_tunnel_dev)
+ t = ipgre_tunnel_locate(&p, 0);
+ if (t == NULL)
+ t = (struct ip_tunnel*)dev->priv;
+ memcpy(&p, &t->parms, sizeof(p));
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+
+ err = -EINVAL;
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
+ p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)) ||
+ ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+ goto done;
+ if (p.iph.ttl)
+ p.iph.frag_off |= __constant_htons(IP_DF);
+
+ if (!(p.i_flags&GRE_KEY))
+ p.i_key = 0;
+ if (!(p.o_flags&GRE_KEY))
+ p.o_key = 0;
+
+ t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+ if (t) {
+ err = 0;
+ if (cmd == SIOCCHGTUNNEL) {
+ t->parms.iph.ttl = p.iph.ttl;
+ t->parms.iph.tos = p.iph.tos;
+ t->parms.iph.frag_off = p.iph.frag_off;
+ }
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+ err = -EFAULT;
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ if (dev == &ipgre_fb_tunnel_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+ err = -ENOENT;
+ if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == &ipgre_fb_tunnel)
+ goto done;
+ }
+ err = unregister_netdevice(dev);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+done:
+ MOD_DEC_USE_COUNT;
+ return err;
+}
+
+static struct net_device_stats *ipgre_tunnel_get_stats(struct device *dev)
+{
+ return &(((struct ip_tunnel*)dev->priv)->stat);
+}
+
+static int ipgre_tunnel_change_mtu(struct device *dev, int new_mtu)
+{
+ struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+/* Nice toy. Unfortunately, useless in real life :-)
+ It allows to construct virtual multiprotocol broadcast "LAN"
+ over the Internet, provided multicast routing is tuned.
+
+
+ I have no idea was this bicycle invented before me,
+ so that I had to set ARPHRD_IPGRE to a random value.
+ I have an impression, that Cisco could make something similar,
+ but this feature is apparently missing in IOS<=11.2(8).
+
+ I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
+ with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
+
+ ping -t 255 224.66.66.66
+
+ If nobody answers, mbone does not work.
+
+ ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
+ ip addr add 10.66.66.<somewhat>/24 dev Universe
+ ifconfig Universe up
+ ifconfig Universe add fe80::<Your_real_addr>/10
+ ifconfig Universe add fc80:6666:6666::<Your_real_addr>/96
+ ftp 10.66.66.66
+ ...
+ ftp fc80:6666:6666::193.233.7.65
+ ...
+
+ */
+
+static int ipgre_header(struct sk_buff *skb, struct device *dev, unsigned short type,
+ void *daddr, void *saddr, unsigned len)
+{
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
+ u16 *p = (u16*)(iph+1);
+
+ memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
+ p[0] = t->parms.o_flags;
+ p[1] = htons(type);
+
+ /*
+ * Set the source hardware address.
+ */
+
+ if (saddr)
+ memcpy(&iph->saddr, saddr, 4);
+
+ if (daddr) {
+ memcpy(&iph->daddr, daddr, 4);
+ return t->hlen;
+ }
+ if (iph->daddr && !MULTICAST(iph->daddr))
+ return t->hlen;
+
+ return -t->hlen;
+}
+
+static int ipgre_rebuild_header(struct sk_buff *skb)
+{
+ struct device *dev = skb->dev;
+ struct iphdr *iph = (struct iphdr *)skb->data;
+ u16 *p = (u16*)(iph + 1);
+ struct neighbour *neigh = NULL;
+
+ if (skb->dst)
+ neigh = skb->dst->neighbour;
+
+ if (neigh)
+ return neigh->ops->resolve((void*)&iph->daddr, skb);
+
+ if (p[1] == __constant_htons(ETH_P_IP))
+ return arp_find((void*)&iph->daddr, skb);
+
+ if (net_ratelimit())
+ printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",
+ dev->name, (int)p[1]);
+ return 0;
+}
+
+static int ipgre_open(struct device *dev)
+{
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+
+ MOD_INC_USE_COUNT;
+ if (MULTICAST(t->parms.iph.daddr)) {
+ struct rtable *rt;
+ if (ip_route_output(&rt, t->parms.iph.daddr,
+ t->parms.iph.saddr, RT_TOS(t->parms.iph.tos),
+ t->parms.link)) {
+ MOD_DEC_USE_COUNT;
+ return -EADDRNOTAVAIL;
+ }
+ dev = rt->u.dst.dev;
+ ip_rt_put(rt);
+ if (dev->ip_ptr == NULL) {
+ MOD_DEC_USE_COUNT;
+ return -EADDRNOTAVAIL;
+ }
+ t->mlink = dev->ifindex;
+ ip_mc_inc_group(dev->ip_ptr, t->parms.iph.daddr);
+ }
+ return 0;
+}
+
+static int ipgre_close(struct device *dev)
+{
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
+ dev = dev_get_by_index(t->mlink);
+ if (dev && dev->ip_ptr)
+ ip_mc_dec_group(dev->ip_ptr, t->parms.iph.daddr);
+ }
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+#endif
+
+static void ipgre_tunnel_init_gen(struct device *dev)
+{
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+
+ dev->destructor = ipgre_tunnel_destroy;
+ dev->hard_start_xmit = ipgre_tunnel_xmit;
+ dev->get_stats = ipgre_tunnel_get_stats;
+ dev->do_ioctl = ipgre_tunnel_ioctl;
+ dev->change_mtu = ipgre_tunnel_change_mtu;
+
+ dev_init_buffers(dev);
+
+ dev->type = ARPHRD_IPGRE;
+ dev->hard_header_len = MAX_HEADER + sizeof(struct iphdr) + 4;
+ dev->mtu = 1500 - sizeof(struct iphdr) - 4;
+ dev->flags = IFF_NOARP;
+ dev->iflink = 0;
+ dev->addr_len = 4;
+ memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
+ memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
+}
+
+static int ipgre_tunnel_init(struct device *dev)
+{
+ struct device *tdev = NULL;
+ struct ip_tunnel *tunnel;
+ struct iphdr *iph;
+ int hlen = MAX_HEADER;
+ int mtu = 1500;
+ int addend = sizeof(struct iphdr) + 4;
+
+ tunnel = (struct ip_tunnel*)dev->priv;
+ iph = &tunnel->parms.iph;
+
+ ipgre_tunnel_init_gen(dev);
+
+ /* Guess output device to choose reasonable mtu and hard_header_len */
+
+ if (iph->daddr) {
+ struct rtable *rt;
+ if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+ tdev = rt->u.dst.dev;
+ ip_rt_put(rt);
+ }
+
+ dev->flags |= IFF_POINTOPOINT;
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+ if (MULTICAST(iph->daddr)) {
+ if (!iph->saddr)
+ return -EINVAL;
+ dev->flags = IFF_BROADCAST;
+ dev->hard_header = ipgre_header;
+ dev->rebuild_header = ipgre_rebuild_header;
+ dev->open = ipgre_open;
+ dev->stop = ipgre_close;
+ }
+#endif
+ }
+
+ if (!tdev && tunnel->parms.link)
+ tdev = dev_get_by_index(tunnel->parms.link);
+
+ if (tdev) {
+ hlen = tdev->hard_header_len;
+ mtu = tdev->mtu;
+ }
+ dev->iflink = tunnel->parms.link;
+
+ /* Precalculate GRE options length */
+ if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+ if (tunnel->parms.o_flags&GRE_CSUM)
+ addend += 4;
+ if (tunnel->parms.o_flags&GRE_KEY)
+ addend += 4;
+ if (tunnel->parms.o_flags&GRE_SEQ)
+ addend += 4;
+ }
+ dev->hard_header_len = hlen + addend;
+ dev->mtu = mtu - addend;
+ tunnel->hlen = addend;
+ return 0;
+}
+
+#ifdef MODULE
+static int ipgre_fb_tunnel_open(struct device *dev)
+{
+ MOD_INC_USE_COUNT;
+ return 0;
+}
+
+static int ipgre_fb_tunnel_close(struct device *dev)
+{
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+#endif
+
+__initfunc(int ipgre_fb_tunnel_init(struct device *dev))
+{
+ struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct iphdr *iph;
+
+ ipgre_tunnel_init_gen(dev);
+#ifdef MODULE
+ dev->open = ipgre_fb_tunnel_open;
+ dev->stop = ipgre_fb_tunnel_close;
+#endif
+
+ iph = &ipgre_fb_tunnel.parms.iph;
+ iph->version = 4;
+ iph->protocol = IPPROTO_GRE;
+ iph->ihl = 5;
+ tunnel->hlen = sizeof(struct iphdr) + 4;
+
+ tunnels_wc[0] = &ipgre_fb_tunnel;
+ return 0;
+}
+
+
+static struct inet_protocol ipgre_protocol = {
+ ipgre_rcv, /* GRE handler */
+ ipgre_err, /* TUNNEL error control */
+ 0, /* next */
+ IPPROTO_GRE, /* protocol ID */
+ 0, /* copy */
+ NULL, /* data */
+ "GRE" /* name */
+};
+
+
+/*
+ * And now the modules code and kernel interface.
+ */
+
+#ifdef MODULE
+int init_module(void)
+#else
+__initfunc(int ipgre_init(void))
+#endif
+{
+ printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
+
+ ipgre_fb_tunnel_dev.priv = (void*)&ipgre_fb_tunnel;
+ ipgre_fb_tunnel_dev.name = ipgre_fb_tunnel.parms.name;
+#ifdef MODULE
+ register_netdev(&ipgre_fb_tunnel_dev);
+#else
+ register_netdevice(&ipgre_fb_tunnel_dev);
+#endif
+
+ inet_add_protocol(&ipgre_protocol);
+ return 0;
+}
+
+#ifdef MODULE
+
+void cleanup_module(void)
+{
+ if ( inet_del_protocol(&ipgre_protocol) < 0 )
+ printk(KERN_INFO "ipgre close: can't remove protocol\n");
+
+ unregister_netdev(&ipgre_fb_tunnel_dev);
+}
+
+#endif
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 09c0bba3b..ad9ee120a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -1,7 +1,7 @@
/*
* Linux NET3: IP/IP protocol decoder.
*
- * Version: $Id: ipip.c,v 1.15 1997-10-10 22:41:14 davem Exp $
+ * Version: $Id: ipip.c,v 1.16 1997-10-29 20:27:15 kuznet Exp $
*
* Authors:
* Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
@@ -13,6 +13,11 @@
* to keep ip_forward happy.
* Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
* Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
+ * David Woodhouse : Perform some basic ICMP handling.
+ * IPIP Routing without decapsulation.
+ * Carlos Picoto : GRE over IP support
+ * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
+ * I do not want to merge them together.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -20,6 +25,73 @@
* 2 of the License, or (at your option) any later version.
*
*/
+
+/* tunnel.c: an IP tunnel driver
+
+ The purpose of this driver is to provide an IP tunnel through
+ which you can tunnel network traffic transparently across subnets.
+
+ This was written by looking at Nick Holloway's dummy driver
+ Thanks for the great code!
+
+ -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
+
+ Minor tweaks:
+ Cleaned up the code a little and added some pre-1.3.0 tweaks.
+ dev->hard_header/hard_header_len changed to use no headers.
+ Comments/bracketing tweaked.
+ Made the tunnels use dev->name not tunnel: when error reporting.
+ Added tx_dropped stat
+
+ -Alan Cox (Alan.Cox@linux.org) 21 March 95
+
+ Reworked:
+ Changed to tunnel to destination gateway in addition to the
+ tunnel's pointopoint address
+ Almost completely rewritten
+ Note: There is currently no firewall or ICMP handling done.
+
+ -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
+
+*/
+
+/* Things I wish I had known when writing the tunnel driver:
+
+ When the tunnel_xmit() function is called, the skb contains the
+ packet to be sent (plus a great deal of extra info), and dev
+ contains the tunnel device that _we_ are.
+
+ When we are passed a packet, we are expected to fill in the
+ source address with our source IP address.
+
+ What is the proper way to allocate, copy and free a buffer?
+ After you allocate it, it is a "0 length" chunk of memory
+ starting at zero. If you want to add headers to the buffer
+ later, you'll have to call "skb_reserve(skb, amount)" with
+ the amount of memory you want reserved. Then, you call
+ "skb_put(skb, amount)" with the amount of space you want in
+ the buffer. skb_put() returns a pointer to the top (#0) of
+ that buffer. skb->len is set to the amount of space you have
+ "allocated" with skb_put(). You can then write up to skb->len
+ bytes to that buffer. If you need more, you can call skb_put()
+ again with the additional amount of space you need. You can
+ find out how much more space you can allocate by calling
+ "skb_tailroom(skb)".
+ Now, to add header space, call "skb_push(skb, header_len)".
+ This creates space at the beginning of the buffer and returns
+ a pointer to this new space. If later you need to strip a
+ header from a buffer, call "skb_pull(skb, header_len)".
+ skb_headroom() will return how much space is left at the top
+ of the buffer (before the main data). Remember, this headroom
+ space must be reserved before the skb_put() function is called.
+ */
+
+/*
+ This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
+
+ For comments look at net/ipv4/ip_gre.c --ANK
+ */
+
#include <linux/config.h>
#include <linux/module.h>
@@ -34,120 +106,354 @@
#include <linux/udp.h>
#include <linux/if_arp.h>
#include <linux/mroute.h>
+#include <linux/init.h>
-#include <net/datalink.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/protocol.h>
#include <net/ipip.h>
-static struct device *ipip_active_tunnels;
-static struct device *ipip_tunnels;
-static struct ipip_tunnel *ipip_fb_tunnel;
+#define HASH_SIZE 16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
-void ipip_err(struct sk_buff *skb, unsigned char *dp, int len)
+static int ipip_fb_tunnel_init(struct device *dev);
+static int ipip_tunnel_init(struct device *dev);
+
+static struct device ipip_fb_tunnel_dev = {
+ NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip_fb_tunnel_init,
+};
+
+static struct ip_tunnel ipip_fb_tunnel = {
+ NULL, &ipip_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"tunl0", }
+};
+
+static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_r[HASH_SIZE];
+static struct ip_tunnel *tunnels_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_wc[1];
+static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
+
+static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
{
- /* NI */
- return;
+ unsigned h0 = HASH(remote);
+ unsigned h1 = HASH(local);
+ struct ip_tunnel *t;
+
+ for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr &&
+ remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ for (t = tunnels_r[h0]; t; t = t->next) {
+ if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ for (t = tunnels_l[h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+ return t;
+ return NULL;
}
-/*
- * The IPIP protocol driver.
- *
- * On entry here
- * skb->data is the original IP header
- * skb->nh points to the initial IP header.
- * skb->h points at the new header.
- */
-
-int ipip_rcv(struct sk_buff *skb, unsigned short len)
+struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
{
+ u32 remote = parms->iph.daddr;
+ u32 local = parms->iph.saddr;
+ struct ip_tunnel *t, **tp, *nt;
struct device *dev;
- struct iphdr *iph;
- struct ipip_tunnel *tunnel;
+ unsigned h = 0;
+ int prio = 0;
- /*
- * Discard the original IP header
- */
-
- skb_pull(skb, skb->h.raw - skb->nh.raw);
-
- /*
- * Adjust pointers
- */
-
- iph = skb->nh.iph;
- skb->nh.iph = skb->h.ipiph;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+ if (remote) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ if (local) {
+ prio |= 1;
+ h ^= HASH(local);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
+ return t;
+ }
+ if (!create)
+ return NULL;
- /*
- * If you want to add LZ compressed IP or things like that here,
- * and in drivers/net/tunnel.c are the places to add.
- */
-
- skb->protocol = htons(ETH_P_IP);
- skb->ip_summed = 0;
- skb->pkt_type = PACKET_HOST;
+ MOD_INC_USE_COUNT;
+ dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
+ if (dev == NULL) {
+ MOD_DEC_USE_COUNT;
+ return NULL;
+ }
+ memset(dev, 0, sizeof(*dev) + sizeof(*t));
+ dev->priv = (void*)(dev+1);
+ nt = (struct ip_tunnel*)dev->priv;
+ nt->dev = dev;
+ dev->name = nt->parms.name;
+ dev->init = ipip_tunnel_init;
+ memcpy(&nt->parms, parms, sizeof(*parms));
+ if (dev->name[0] == 0) {
+ int i;
+ for (i=1; i<100; i++) {
+ sprintf(dev->name, "tunl%d", i);
+ if (dev_get(dev->name) == NULL)
+ break;
+ }
+ if (i==100)
+ goto failed;
+ memcpy(parms->name, dev->name, IFNAMSIZ);
+ }
+ if (register_netdevice(dev) < 0)
+ goto failed;
+
+ start_bh_atomic();
+ nt->next = t;
+ *tp = nt;
+ end_bh_atomic();
+ /* Do not decrement MOD_USE_COUNT here. */
+ return nt;
+
+failed:
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ return NULL;
+}
- tunnel = NULL;
- for (dev=ipip_active_tunnels; dev && (tunnel=(struct ipip_tunnel*)dev->priv);
- dev=tunnel->next_active) {
- if (iph->daddr == tunnel->parms.iph.saddr &&
- iph->saddr == tunnel->parms.iph.daddr &&
- (!tunnel->parms.link || tunnel->parms.link == skb->dev->ifindex))
- break;
+static void ipip_tunnel_destroy(struct device *dev)
+{
+ struct ip_tunnel *t, **tp;
+ struct ip_tunnel *t0 = (struct ip_tunnel*)dev->priv;
+ u32 remote = t0->parms.iph.daddr;
+ u32 local = t0->parms.iph.saddr;
+ unsigned h = 0;
+ int prio = 0;
+
+ if (dev == &ipip_fb_tunnel_dev) {
+ tunnels_wc[0] = NULL;
+ return;
}
- if (tunnel == NULL)
- tunnel = ipip_fb_tunnel;
- if (tunnel == NULL) {
- kfree_skb(skb, FREE_READ);
- return -EINVAL;
+
+ if (remote) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ if (local) {
+ prio |= 1;
+ h ^= HASH(local);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (t == t0) {
+ *tp = t->next;
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ break;
+ }
}
- tunnel->stat.rx_packets++;
- skb->dev = dev;
- dst_release(skb->dst);
- skb->dst = NULL;
- netif_rx(skb);
- return(0);
}
-/*
- * Okay, this needs to be high enough that we can fit a "standard"
- * ethernet header and an IP tunnel header into the outgoing packet.
- * [36 bytes]
- */
-
-#define TUNL_HLEN (((ETH_HLEN+15)&~15)+sizeof(struct iphdr))
-static int ipip_tunnel_open(struct device *dev)
+void ipip_err(struct sk_buff *skb, unsigned char *dp, int len)
{
- MOD_INC_USE_COUNT;
+#ifndef I_WISH_WORLD_WERE_PERFECT
- cli();
- ((struct ipip_tunnel*)dev->priv)->next_active = ipip_active_tunnels;
- ipip_active_tunnels=dev;
- sti();
- return 0;
+/* It is not :-( All the routers (except for Linux) return only
+ 8 bytes of packet payload. It means, that precise relaying of
+ ICMP in the real Internet is absolutely infeasible.
+ */
+ struct iphdr *iph = (struct iphdr*)dp;
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ struct ip_tunnel *t;
+
+ if (len < sizeof(struct iphdr))
+ return;
+
+ switch (type) {
+ default:
+ case ICMP_PARAMETERPROB:
+ return;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* Soft state for pmtu is maintained by IP core. */
+ return;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+ }
+
+ t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
+ if (t == NULL || t->parms.iph.daddr == 0)
+ return;
+ if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+ return;
+
+ if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+ t->err_count++;
+ else
+ t->err_count = 1;
+ t->err_time = jiffies;
+ return;
+#else
+ struct iphdr *iph = (struct iphdr*)dp;
+ int hlen = iph->ihl<<2;
+ struct iphdr *eiph;
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ int rel_type = 0;
+ int rel_code = 0;
+ int rel_info = 0;
+ struct sk_buff *skb2;
+ struct rtable *rt;
+
+ if (len < hlen + sizeof(struct iphdr))
+ return;
+ eiph = (struct iphdr*)(dp + hlen);
+
+ switch (type) {
+ default:
+ return;
+ case ICMP_PARAMETERPROB:
+ if (skb->h.icmph->un.gateway < hlen)
+ return;
+
+ /* So... This guy found something strange INSIDE encapsulated
+ packet. Well, he is fool, but what can we do ?
+ */
+ rel_type = ICMP_PARAMETERPROB;
+ rel_info = skb->h.icmph->un.gateway - hlen;
+ break;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* And it is the only really necesary thing :-) */
+ rel_info = ntohs(skb->h.icmph->un.frag.mtu);
+ if (rel_info < hlen+68)
+ return;
+ rel_info -= hlen;
+ /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
+ if (rel_info > ntohs(eiph->tot_len))
+ return;
+ break;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe, it is just ether pollution. --ANK
+ */
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+ }
+
+ /* Prepare fake skb to feed it to icmp_send */
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2 == NULL)
+ return;
+ dst_release(skb2->dst);
+ skb2->dst = NULL;
+ skb_pull(skb2, skb->data - (u8*)eiph);
+ skb2->nh.raw = skb2->data;
+
+ /* Try to guess incoming interface */
+ if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ skb2->dev = rt->u.dst.dev;
+
+ /* route "incoming" packet */
+ if (rt->rt_flags&RTCF_LOCAL) {
+ ip_rt_put(rt);
+ rt = NULL;
+ if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
+ rt->u.dst.dev->type != ARPHRD_IPGRE) {
+ ip_rt_put(rt);
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ } else {
+ ip_rt_put(rt);
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
+ skb2->dst->dev->type != ARPHRD_IPGRE) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ }
+
+ /* change mtu on this route */
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ if (rel_info > skb2->dst->pmtu) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ skb2->dst->pmtu = rel_info;
+ rel_info = htonl(rel_info);
+ } else if (type == ICMP_TIME_EXCEEDED) {
+ struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+ if (t->parms.iph.ttl) {
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ }
+ }
+
+ icmp_send(skb2, rel_type, rel_code, rel_info);
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+#endif
}
-static int ipip_tunnel_close(struct device *dev)
+int ipip_rcv(struct sk_buff *skb, unsigned short len)
{
- struct device **devp;
- struct ipip_tunnel *tunnel;
-
- for (devp=&ipip_active_tunnels;
- *devp && (tunnel=(struct ipip_tunnel*)(*devp)->priv);
- devp=&tunnel->next_active) {
- if (dev == *devp) {
- cli();
- *devp = tunnel->next_active;
- tunnel->next_active = NULL;
- sti();
- }
+ struct iphdr *iph;
+ struct ip_tunnel *tunnel;
+
+ iph = skb->nh.iph;
+ skb->nh.raw = skb_pull(skb, skb->h.raw - skb->data);
+ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+ skb->protocol = __constant_htons(ETH_P_IP);
+ skb->ip_summed = 0;
+ skb->pkt_type = PACKET_HOST;
+
+ if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+ tunnel->stat.rx_packets++;
+ tunnel->stat.rx_bytes += skb->len;
+ skb->dev = tunnel->dev;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ netif_rx(skb);
+ return 0;
}
- MOD_DEC_USE_COUNT;
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+ kfree_skb(skb, FREE_READ);
return 0;
}
@@ -158,67 +464,98 @@ static int ipip_tunnel_close(struct device *dev)
static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev)
{
- struct net_device_stats *stats; /* This device's statistics */
- struct ipip_tunnel *tunnel = (struct ipip_tunnel*)dev->priv;
+ struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct net_device_stats *stats = &tunnel->stat;
+ struct iphdr *tiph = &tunnel->parms.iph;
+ u8 tos = tunnel->parms.iph.tos;
+ u16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
struct device *tdev; /* Device to other host */
- struct iphdr *tiph;
+ struct iphdr *old_iph = skb->nh.iph;
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
- u8 tos;
- int iif;
+ u32 dst = tiph->daddr;
+ int mtu;
+
+ if (tunnel->recursion++) {
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ goto tx_error;
- stats = &(tunnel->stat);
- tiph = &(tunnel->parms.iph);
-
- /*
- * First things first. Look up the destination address in the
- * routing tables
- */
- iph = skb->nh.iph;
- tos = tunnel->parms.iph.tos;
if (tos&1)
- tos = iph->tos;
+ tos = old_iph->tos;
- if (ip_route_output(&rt, tiph->daddr, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
- /* No route to host */
- printk ( KERN_INFO "%s: Can't reach target gateway!\n", dev->name);
- stats->tx_errors++;
- dev_kfree_skb(skb, FREE_WRITE);
- return 0;
+ if (!dst) {
+ /* NBMA tunnel */
+ if ((rt = (struct rtable*)skb->dst) == NULL) {
+ tunnel->stat.tx_fifo_errors++;
+ goto tx_error;
+ }
+ if ((dst = rt->rt_gateway) == 0)
+ goto tx_error_icmp;
+ }
+
+ if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+ tunnel->stat.tx_carrier_errors++;
+ goto tx_error_icmp;
}
tdev = rt->u.dst.dev;
- if (tdev->type == ARPHRD_TUNNEL) {
- /* Tunnel to tunnel? -- I don't think so. */
- printk ( KERN_INFO "%s: Packet targetted at myself!\n" , dev->name);
+ if (tdev == dev) {
ip_rt_put(rt);
- stats->tx_errors++;
- dev_kfree_skb(skb, FREE_WRITE);
- return 0;
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
+
+ mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+ if (mtu < 68) {
+ tunnel->stat.collisions++;
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ if (skb->dst && mtu < skb->dst->pmtu)
+ skb->dst->pmtu = mtu;
+
+ df |= (old_iph->frag_off&__constant_htons(IP_DF));
+
+ if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+
+ if (tunnel->err_count > 0) {
+ if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+ tunnel->err_count--;
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ } else
+ tunnel->err_count = 0;
}
- skb->h.ipiph = skb->nh.iph;
+ skb->h.raw = skb->nh.raw;
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
- if (skb_headroom(skb) < max_headroom || skb_shared(skb)) {
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
if (!new_skb) {
ip_rt_put(rt);
stats->tx_dropped++;
dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
return 0;
}
dev_kfree_skb(skb, FREE_WRITE);
skb = new_skb;
}
- skb->nh.iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr));
- iif = ((struct rtable*)skb->dst)->key.iif;
+ skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
dst_release(skb->dst);
skb->dst = &rt->u.dst;
@@ -226,129 +563,210 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev)
/*
* Push down and install the IPIP header.
*/
-
+
iph = skb->nh.iph;
- memcpy(iph, tiph, sizeof(struct iphdr));
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr)>>2;
+ iph->frag_off = df;
+ iph->protocol = IPPROTO_IPIP;
iph->tos = tos;
+ iph->daddr = rt->rt_dst;
+ iph->saddr = rt->rt_src;
+
+ if ((iph->ttl = tiph->ttl) == 0)
+ iph->ttl = old_iph->ttl;
- /* If ttl was specified, we set it, but only
- for locally originated packets.
- It allows to use OSPF/RIPv2 over tunnels, but still
- prevents cathastrophic (checked ;-) tunnel loops.
- */
- if (iif || !tiph->ttl)
- iph->ttl = skb->h.ipiph->ttl;
iph->tot_len = htons(skb->len);
iph->id = htons(ip_id_count++);
ip_send_check(iph);
stats->tx_bytes += skb->len;
+ stats->tx_packets++;
ip_send(skb);
+ tunnel->recursion--;
+ return 0;
- /* Record statistics and return */
- stats->tx_packets++;
+tx_error_icmp:
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+tx_error:
+ stats->tx_errors++;
+ dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
return 0;
}
static int
ipip_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd)
{
- struct ipip_parms p;
+ int err = 0;
+ struct ip_tunnel_parm p;
+ struct ip_tunnel *t;
+
+ MOD_INC_USE_COUNT;
switch (cmd) {
- case SIOCGTUNNEL:
- memcpy(&p, &((struct ipip_tunnel*)dev->priv)->parms, sizeof(p));
+ case SIOCGETTUNNEL:
+ t = NULL;
+ if (dev == &ipip_fb_tunnel_dev)
+ t = ipip_tunnel_locate(&p, 0);
+ if (t == NULL)
+ t = (struct ip_tunnel*)dev->priv;
+ memcpy(&p, &t->parms, sizeof(p));
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- return -EFAULT;
- return 0;
+ err = -EFAULT;
+ break;
- case SIOCSTUNNEL:
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- return -EFAULT;
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
- p.iph.ihl != 5 || p.iph.frag_off&htons(IP_MF|IP_OFFSET) ||
- !p.iph.saddr || !p.iph.daddr)
- return -EINVAL;
+ goto done;
- start_bh_atomic();
- memcpy(&((struct ipip_tunnel*)dev->priv)->parms, &p, sizeof(p));
- end_bh_atomic();
- return 0;
+ err = -EINVAL;
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+ p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)))
+ goto done;
+ if (p.iph.ttl)
+ p.iph.frag_off |= __constant_htons(IP_DF);
+
+ t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+ if (t) {
+ err = 0;
+ if (cmd == SIOCCHGTUNNEL) {
+ t->parms.iph.ttl = p.iph.ttl;
+ t->parms.iph.tos = p.iph.tos;
+ t->parms.iph.frag_off = p.iph.frag_off;
+ }
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+ err = -EFAULT;
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ if (dev == &ipip_fb_tunnel_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+ err = -ENOENT;
+ if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == &ipip_fb_tunnel)
+ goto done;
+ }
+ err = unregister_netdevice(dev);
+ break;
default:
- return -EINVAL;
+ err = -EINVAL;
}
+
+done:
+ MOD_DEC_USE_COUNT;
+ return err;
}
static struct net_device_stats *ipip_tunnel_get_stats(struct device *dev)
{
- return &(((struct ipip_tunnel*)dev->priv)->stat);
+ return &(((struct ip_tunnel*)dev->priv)->stat);
}
-
-void ipip_tunnel_destroy(struct device *dev)
+static int ipip_tunnel_change_mtu(struct device *dev, int new_mtu)
{
- struct device *d, **dp;
-
- for (dp=&ipip_tunnels; (d=*dp) != NULL;
- dp=&((struct ipip_tunnel*)dev->priv)->next) {
- if (d != dev)
- continue;
- *dp = ((struct ipip_tunnel*)dev->priv)->next;
- kfree(dev->priv);
- dev->priv = NULL;
- }
+ if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
}
-EXPORT_SYMBOL(ipip_tunnel_init);
-
-int ipip_tunnel_init(struct device *dev)
+static void ipip_tunnel_init_gen(struct device *dev)
{
- struct ipip_tunnel *tunnel;
- struct iphdr *iph;
-
- dev->priv = kmalloc(sizeof(struct ipip_tunnel), GFP_KERNEL);
- if (dev->priv == NULL)
- return -ENOBUFS;
- tunnel = (struct ipip_tunnel*)dev->priv;
- memset(tunnel, 0, sizeof(*tunnel));
- iph = &tunnel->parms.iph;
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
dev->destructor = ipip_tunnel_destroy;
- dev->open = ipip_tunnel_open;
- dev->stop = ipip_tunnel_close;
dev->hard_start_xmit = ipip_tunnel_xmit;
dev->get_stats = ipip_tunnel_get_stats;
dev->do_ioctl = ipip_tunnel_ioctl;
-
- dev_init_buffers(dev);
+ dev->change_mtu = ipip_tunnel_change_mtu;
- dev->hard_header = NULL;
- dev->rebuild_header = NULL;
- dev->set_mac_address = NULL;
- dev->hard_header_cache = NULL;
- dev->header_cache_update= NULL;
+ dev_init_buffers(dev);
dev->type = ARPHRD_TUNNEL;
- dev->hard_header_len = TUNL_HLEN;
- dev->mtu = 1500-sizeof(struct iphdr);
- dev->flags = IFF_NOARP|IFF_POINTOPOINT;
+ dev->hard_header_len = MAX_HEADER + sizeof(struct iphdr);
+ dev->mtu = 1500 - sizeof(struct iphdr);
+ dev->flags = IFF_NOARP;
+ dev->iflink = 0;
+ dev->addr_len = 4;
+ memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
+ memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
+}
- memset(iph, 0, sizeof(*iph));
- iph->version = 4;
- iph->protocol = IPPROTO_IPIP;
- iph->ihl = 5;
+static int ipip_tunnel_init(struct device *dev)
+{
+ struct device *tdev = NULL;
+ struct ip_tunnel *tunnel;
+ struct iphdr *iph;
+
+ tunnel = (struct ip_tunnel*)dev->priv;
+ iph = &tunnel->parms.iph;
+
+ ipip_tunnel_init_gen(dev);
+
+ if (iph->daddr) {
+ struct rtable *rt;
+ if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+ tdev = rt->u.dst.dev;
+ ip_rt_put(rt);
+ }
+ dev->flags |= IFF_POINTOPOINT;
+ }
+
+ if (!tdev && tunnel->parms.link)
+ tdev = dev_get_by_index(tunnel->parms.link);
- cli();
- ipip_tunnels=dev;
- tunnel->next=ipip_tunnels;
- sti();
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+ dev->mtu = tdev->mtu - sizeof(struct iphdr);
+ }
+ dev->iflink = tunnel->parms.link;
return 0;
}
+#ifdef MODULE
+static int ipip_fb_tunnel_open(struct device *dev)
+{
+ MOD_INC_USE_COUNT;
+ return 0;
+}
+static int ipip_fb_tunnel_close(struct device *dev)
+{
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+#endif
+
+__initfunc(int ipip_fb_tunnel_init(struct device *dev))
+{
+ struct iphdr *iph;
+
+ ipip_tunnel_init_gen(dev);
#ifdef MODULE
+ dev->open = ipip_fb_tunnel_open;
+ dev->close = ipip_fb_tunnel_close;
+#endif
+
+ iph = &ipip_fb_tunnel.parms.iph;
+ iph->version = 4;
+ iph->protocol = IPPROTO_IPIP;
+ iph->ihl = 5;
+
+ tunnels_wc[0] = &ipip_fb_tunnel;
+ return 0;
+}
static struct inet_protocol ipip_protocol = {
ipip_rcv, /* IPIP handler */
@@ -360,32 +778,34 @@ static struct inet_protocol ipip_protocol = {
"IPIP" /* name */
};
+#ifdef MODULE
+int init_module(void)
+#else
+__initfunc(int ipip_init(void))
+#endif
+{
+ printk(KERN_INFO "IPv4 over IPv4 tunneling driver\n");
-/*
- * And now the modules code and kernel interface.
- */
+ ipip_fb_tunnel_dev.priv = (void*)&ipip_fb_tunnel;
+ ipip_fb_tunnel_dev.name = ipip_fb_tunnel.parms.name;
+#ifdef MODULE
+ register_netdev(&ipip_fb_tunnel_dev);
+#else
+ register_netdevice(&ipip_fb_tunnel_dev);
+#endif
-int init_module( void)
-{
inet_add_protocol(&ipip_protocol);
return 0;
}
-void cleanup_module( void)
-{
- struct device *dev, *dev_next;
- struct ipip_tunnel *tunnel;
+#ifdef MODULE
+void cleanup_module(void)
+{
if ( inet_del_protocol(&ipip_protocol) < 0 )
printk(KERN_INFO "ipip close: can't remove protocol\n");
- for (dev=ipip_tunnels; dev; dev=dev_next) {
- tunnel=(struct ipip_tunnel*)dev;
- dev_next = tunnel->next;
- unregister_netdevice(dev);
- kfree(dev->priv);
- kfree(dev);
- }
+ unregister_netdevice(&ipip_fb_tunnel_dev);
}
#endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b968fdba8..d70a762e1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -9,7 +9,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Version: $Id: ipmr.c,v 1.26 1997-10-24 17:16:03 kuznet Exp $
+ * Version: $Id: ipmr.c,v 1.27 1997-10-29 20:27:31 kuznet Exp $
*
* Fixes:
* Michael Chastain : Incorrect size of copying.
@@ -79,74 +79,51 @@ static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtm
extern struct inet_protocol pim_protocol;
-
-#ifdef CONFIG_NET_IPIP
-extern int ipip_tunnel_init(struct device *dev);
-int (*ipip_tunnel_hook)(struct device*) = ipip_tunnel_init;
-#else
-int (*ipip_tunnel_hook)(struct device*);
-#endif
-
static
struct device *ipmr_new_tunnel(struct vifctl *v)
{
- struct device *dev;
- struct in_device *in_dev;
- int size;
- struct iphdr *iph;
- struct rtable *rt;
- int link;
-
- if (ipip_tunnel_hook == NULL)
- return NULL;
-
- if (ip_route_output(&rt, v->vifc_rmt_addr.s_addr,
- v->vifc_lcl_addr.s_addr, 0, 0))
- return NULL;
- link = rt->u.dst.dev->ifindex;
- ip_rt_put(rt);
-
- size = sizeof(*dev)+sizeof(struct ipip_tunnel)+IFNAMSIZ;
- dev = kmalloc(size, GFP_ATOMIC);
- if (!dev)
- return NULL;
-
- memset(dev, 0, size);
-
- dev->priv = dev+1;
- dev->name = ((struct ipip_tunnel*)dev->priv)->name;
- sprintf(dev->name, "dvmrp%d", v->vifc_vifi);
- dev->init = ipip_tunnel_hook;
+ struct device *dev = NULL;
rtnl_lock();
- if (register_netdevice(dev)) {
- rtnl_unlock();
- kfree(dev);
- return NULL;
- }
-
- dev->flags |= IFF_MULTICAST;
-
- ((struct ipip_tunnel*)dev->priv)->parms.link = link;
- iph = &((struct ipip_tunnel*)dev->priv)->parms.iph;
- iph->saddr = v->vifc_lcl_addr.s_addr;
- iph->daddr = v->vifc_rmt_addr.s_addr;
-
- if ((in_dev = inetdev_init(dev)) == NULL)
- goto failure;
- in_dev->flags |= IFF_IP_MFORWARD;
-
- if (dev_open(dev))
- goto failure;
+ dev = dev_get("tunl0");
+ if (dev) {
+ int err;
+ struct ifreq ifr;
+ unsigned long oldfs;
+ struct ip_tunnel_parm p;
+ struct in_device *in_dev;
+
+ memset(&p, 0, sizeof(p));
+ p.iph.daddr = v->vifc_rmt_addr.s_addr;
+ p.iph.saddr = v->vifc_lcl_addr.s_addr;
+ p.iph.version = 4;
+ p.iph.ihl = 5;
+ p.iph.protocol = IPPROTO_IPIP;
+ sprintf(p.name, "dvmrp%d", v->vifc_vifi);
+ ifr.ifr_ifru.ifru_data = (void*)&p;
+
+ oldfs = get_fs(); set_fs(KERNEL_DS);
+ err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+ set_fs(oldfs);
+
+ if (err == 0 && (dev = dev_get(p.name)) != NULL) {
+ dev->flags |= IFF_MULTICAST;
+
+ in_dev = dev->ip_ptr;
+ if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
+ goto failure;
+
+ if (dev_open(dev))
+ goto failure;
+ }
+ }
rtnl_unlock();
- dev_set_allmulti(dev, +1);
return dev;
failure:
unregister_netdevice(dev);
rtnl_unlock();
- kfree(dev);
return NULL;
}
@@ -162,6 +139,11 @@ static int reg_vif_xmit(struct sk_buff *skb, struct device *dev)
return 0;
}
+static struct net_device_stats *reg_vif_get_stats(struct device *dev)
+{
+ return (struct net_device_stats*)dev->priv;
+}
+
static
struct device *ipmr_reg_vif(struct vifctl *v)
{
@@ -169,7 +151,7 @@ struct device *ipmr_reg_vif(struct vifctl *v)
struct in_device *in_dev;
int size;
- size = sizeof(*dev) + IFNAMSIZ;
+ size = sizeof(*dev) + IFNAMSIZ + sizeof(struct net_device_stats);
dev = kmalloc(size, GFP_KERNEL);
if (!dev)
return NULL;
@@ -177,13 +159,15 @@ struct device *ipmr_reg_vif(struct vifctl *v)
memset(dev, 0, size);
dev->priv = dev + 1;
- dev->name = dev->priv;
+ dev->name = dev->priv + sizeof(struct net_device_stats);
+
strcpy(dev->name, "pimreg");
- dev->type = ARPHRD_TUNNEL;
- dev->mtu = 0xFFFF;
- dev->flags = IFF_NOARP|IFF_MULTICAST;
+ dev->type = ARPHRD_PIMREG;
+ dev->mtu = 1500 - sizeof(struct iphdr) - 8;
+ dev->flags = IFF_NOARP;
dev->hard_start_xmit = reg_vif_xmit;
+ dev->get_stats = reg_vif_get_stats;
rtnl_lock();
@@ -195,13 +179,11 @@ struct device *ipmr_reg_vif(struct vifctl *v)
if ((in_dev = inetdev_init(dev)) == NULL)
goto failure;
- in_dev->flags |= IFF_IP_MFORWARD;
if (dev_open(dev))
goto failure;
rtnl_unlock();
- dev_set_allmulti(dev, +1);
reg_dev = dev;
return dev;
@@ -232,12 +214,11 @@ static int vif_delete(int vifi)
v->dev = NULL;
vifc_map &= ~(1<<vifi);
- in_dev = dev->ip_ptr;
- if (in_dev)
+ if ((in_dev = dev->ip_ptr) != NULL)
in_dev->flags &= ~IFF_IP_MFORWARD;
dev_set_allmulti(dev, -1);
- ip_rt_multicast_event(dev->ip_ptr);
+ ip_rt_multicast_event(in_dev);
if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) {
#ifdef CONFIG_IP_PIMSM
@@ -247,7 +228,8 @@ static int vif_delete(int vifi)
}
#endif
unregister_netdevice(dev);
- kfree(dev);
+ if (v->flags&VIFF_REGISTER)
+ kfree(dev);
}
if (vifi+1 == maxvif) {
@@ -261,21 +243,27 @@ static int vif_delete(int vifi)
return 0;
}
-static void ipmr_set_bounds(struct mfc_cache *cache)
+static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
{
int vifi;
+
+ start_bh_atomic();
+
+ cache->mfc_minvif = MAXVIFS;
+ cache->mfc_maxvif = 0;
+ memset(cache->mfc_ttls, 255, MAXVIFS);
+
for (vifi=0; vifi<maxvif; vifi++) {
- if (vifc_map&(1<<vifi) && cache->mfc_ttls[vifi]) {
- cache->mfc_minvif = vifi;
- cache->mfc_maxvif = vifi+1;
+ if (vifc_map&(1<<vifi) && ttls[vifi] && ttls[vifi] < 255) {
+ cache->mfc_ttls[vifi] = ttls[vifi];
+ if (cache->mfc_minvif > vifi)
+ cache->mfc_minvif = vifi;
+ if (cache->mfc_maxvif <= vifi)
+ cache->mfc_maxvif = vifi + 1;
vifi++;
- break;
}
}
- for ( ; vifi<maxvif; vifi++) {
- if (vifc_map&(1<<vifi) && cache->mfc_ttls[vifi])
- cache->mfc_maxvif = vifi+1;
- }
+ end_bh_atomic();
}
/*
@@ -626,8 +614,7 @@ int ipmr_mfc_modify(int action, struct mfcctl *mfc)
cache->mfc_flags|=MFC_RESOLVED;
cache->mfc_parent=mfc->mfcc_parent;
- memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls));
- ipmr_set_bounds(cache);
+ ipmr_update_threshoulds(cache, mfc->mfcc_ttls);
/*
* Check to see if we resolved a queued list. If so we
@@ -655,8 +642,7 @@ int ipmr_mfc_modify(int action, struct mfcctl *mfc)
cache->mfc_origin=mfc->mfcc_origin.s_addr;
cache->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
cache->mfc_parent=mfc->mfcc_parent;
- memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls));
- ipmr_set_bounds(cache);
+ ipmr_update_threshoulds(cache, mfc->mfcc_ttls);
ipmr_cache_insert(cache);
end_bh_atomic();
return 0;
@@ -666,8 +652,8 @@ static void mrtsock_destruct(struct sock *sk)
{
if (sk == mroute_socket) {
ipv4_config.multicast_route = 0;
- mroute_close(sk);
mroute_socket=NULL;
+ mroute_close(sk);
}
}
@@ -680,7 +666,6 @@ static void mrtsock_destruct(struct sock *sk)
int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
{
- int err;
struct vifctl vif;
struct mfcctl mfc;
@@ -699,9 +684,8 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
return -ENOPROTOOPT;
{
int opt;
- err = get_user(opt,(int *)optval);
- if (err)
- return err;
+ if (get_user(opt,(int *)optval))
+ return -EFAULT;
if (opt != 1)
return -ENOPROTOOPT;
}
@@ -720,15 +704,15 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
case MRT_DEL_VIF:
if(optlen!=sizeof(vif))
return -EINVAL;
- err = copy_from_user(&vif,optval,sizeof(vif));
- if (err)
+ if (copy_from_user(&vif,optval,sizeof(vif)))
return -EFAULT;
- if(vif.vifc_vifi > MAXVIFS)
+ if(vif.vifc_vifi >= MAXVIFS)
return -ENFILE;
if(optname==MRT_ADD_VIF)
{
struct vif_device *v=&vif_table[vif.vifc_vifi];
struct device *dev;
+ struct in_device *in_dev;
/* Is vif busy ? */
if (vifc_map&(1<<vif.vifc_vifi))
@@ -759,19 +743,22 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
break;
case 0:
dev=ip_dev_find(vif.vifc_lcl_addr.s_addr);
- if (!dev || !dev->ip_ptr)
+ if (!dev)
return -EADDRNOTAVAIL;
- if (((struct in_device*)dev->ip_ptr)->flags & IFF_IP_MFORWARD)
- return -EADDRINUSE;
- ((struct in_device*)dev->ip_ptr)->flags |= IFF_IP_MFORWARD;
- dev_set_allmulti(dev, +1);
- ip_rt_multicast_event(dev->ip_ptr);
break;
default:
printk(KERN_DEBUG "ipmr_add_vif: flags %02x\n", vif.vifc_flags);
return -EINVAL;
}
+ if ((in_dev = dev->ip_ptr) == NULL)
+ return -EADDRNOTAVAIL;
+ if (in_dev->flags & IFF_IP_MFORWARD)
+ return -EADDRINUSE;
+ in_dev->flags |= IFF_IP_MFORWARD;
+ dev_set_allmulti(dev, +1);
+ ip_rt_multicast_event(in_dev);
+
/*
* Fill in the VIF structures
*/
@@ -787,7 +774,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
v->pkt_in = 0;
v->pkt_out = 0;
v->link = dev->ifindex;
- if (vif.vifc_flags&VIFF_TUNNEL)
+ if (vif.vifc_flags&(VIFF_TUNNEL|VIFF_REGISTER))
v->link = dev->iflink;
vifc_map|=(1<<vif.vifc_vifi);
if (vif.vifc_vifi+1 > maxvif)
@@ -810,8 +797,9 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
case MRT_DEL_MFC:
if(optlen!=sizeof(mfc))
return -EINVAL;
- err = copy_from_user(&mfc,optval, sizeof(mfc));
- return err ? -EFAULT : ipmr_mfc_modify(optname, &mfc);
+ if (copy_from_user(&mfc,optval, sizeof(mfc)))
+ return -EFAULT;
+ return ipmr_mfc_modify(optname, &mfc);
/*
* Control PIM assert.
*/
@@ -895,7 +883,6 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
- int err;
struct sioc_sg_req sr;
struct sioc_vif_req vr;
struct vif_device *vif;
@@ -904,8 +891,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
switch(cmd)
{
case SIOCGETVIFCNT:
- err = copy_from_user(&vr,(void *)arg,sizeof(vr));
- if (err)
+ if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
return -EFAULT;
if(vr.vifi>=maxvif)
return -EINVAL;
@@ -916,16 +902,13 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
vr.ocount=vif->pkt_out;
vr.ibytes=vif->bytes_in;
vr.obytes=vif->bytes_out;
- err = copy_to_user((void *)arg,&vr,sizeof(vr));
- if (err)
- err = -EFAULT;
- return err;
+ if (copy_to_user((void *)arg,&vr,sizeof(vr)))
+ return -EFAULT;
return 0;
}
return -EADDRNOTAVAIL;
case SIOCGETSGCNT:
- err = copy_from_user(&sr,(void *)arg,sizeof(sr));
- if (err)
+ if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
return -EFAULT;
for (c = mfc_cache_array[MFC_HASH(sr.grp.s_addr, sr.src.s_addr)];
c; c = c->next) {
@@ -934,10 +917,8 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
sr.pktcnt = c->mfc_pkt;
sr.bytecnt = c->mfc_bytes;
sr.wrong_if = c->mfc_wrong_if;
- err = copy_to_user((void *)arg,&sr,sizeof(sr));
- if (err)
- err = -EFAULT;
- return err;
+ if (copy_to_user((void *)arg,&sr,sizeof(sr)))
+ return -EFAULT;
return 0;
}
}
@@ -983,11 +964,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
return NOTIFY_DONE;
v=&vif_table[0];
for(ct=0;ct<maxvif;ct++) {
- if (!(v->flags&(VIFF_TUNNEL|VIFF_REGISTER))) {
- if (vifc_map&(1<<ct) && v->dev==ptr)
- vif_delete(ct);
- v++;
- }
+ if (vifc_map&(1<<ct) && v->dev==ptr)
+ vif_delete(ct);
+ v++;
}
return NOTIFY_DONE;
}
@@ -1038,25 +1017,24 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
struct rtable *rt;
int encap = 0;
struct sk_buff *skb2;
- int err;
#ifdef CONFIG_IP_PIMSM
if (vif->flags & VIFF_REGISTER) {
vif->pkt_out++;
vif->bytes_out+=skb->len;
+ ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
+ ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
return;
}
#endif
if (vif->flags&VIFF_TUNNEL) {
- err = ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link);
- if (err)
+ if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
return;
encap = sizeof(struct iphdr);
} else {
- err = ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link);
- if (err)
+ if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
return;
}
@@ -1100,8 +1078,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
if (vif->flags & VIFF_TUNNEL) {
ip_encap(skb2, vif->local, vif->remote);
- ((struct ipip_tunnel *)vif->dev->priv)->stat.tx_packets++;
- ((struct ipip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
+ ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
+ ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
}
IPCB(skb2)->flags |= IPSKB_FORWARDED;
@@ -1145,7 +1123,7 @@ int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (vif >= maxvif || !(vifc_map&(1<<vif)) || vif_table[vif].dev != skb->dev) {
+ if (vif_table[vif].dev != skb->dev) {
int true_vifi;
if (((struct rtable*)skb->dst)->key.iif == 0) {
@@ -1166,13 +1144,13 @@ int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
cache->mfc_wrong_if++;
true_vifi = ipmr_find_vif(skb->dev);
- if (vif < MAXVIFS && true_vifi < MAXVIFS && mroute_do_assert &&
+ if (true_vifi < MAXVIFS && mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
so that we cannot check that packet arrived on an oif.
It is bad, but otherwise we would need to move pretty
large chunk of pimd to kernel. Ough... --ANK
*/
- (mroute_do_pim || cache->mfc_ttls[true_vifi]) &&
+ (mroute_do_pim || cache->mfc_ttls[true_vifi] < 255) &&
jiffies - cache->mfc_last_assert > MFC_ASSERT_THRESH) {
cache->mfc_last_assert = jiffies;
ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
@@ -1186,17 +1164,12 @@ int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
/*
* Forward the frame
*/
- ct = cache->mfc_maxvif-1;
- while (ct>=cache->mfc_minvif) {
- /*
- * 0 means don't do it. Silly idea, 255 as don't do it would be cleaner!
- */
- if (skb->nh.iph->ttl > cache->mfc_ttls[ct] && cache->mfc_ttls[ct]>0) {
+ for (ct = cache->mfc_maxvif-1; ct >= cache->mfc_minvif; ct--) {
+ if (skb->nh.iph->ttl > cache->mfc_ttls[ct]) {
if (psend != -1)
ipmr_queue_xmit(skb, cache, psend, 0);
psend=ct;
}
- ct--;
}
if (psend != -1)
ipmr_queue_xmit(skb, cache, psend, !local);
@@ -1318,7 +1291,8 @@ int pim_rcv_v1(struct sk_buff * skb, unsigned short len)
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
skb->dst = NULL;
-
+ ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
+ ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
netif_rx(skb);
return 0;
}
@@ -1332,10 +1306,9 @@ int pim_rcv(struct sk_buff * skb, unsigned short len)
if (len < sizeof(*pim) + sizeof(*encap) ||
pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
- pim->addrlen != 4 ||
(pim->flags&PIM_NULL_REGISTER) ||
reg_dev == NULL ||
- ip_compute_csum((void *)pim, sizeof(*pim))) {
+ ip_compute_csum((void *)pim, len)) {
kfree_skb(skb, FREE_READ);
return -EINVAL;
}
@@ -1356,6 +1329,8 @@ int pim_rcv(struct sk_buff * skb, unsigned short len)
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
+ ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
+ ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
skb->dst = NULL;
netif_rx(skb);
return 0;
@@ -1377,15 +1352,15 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
rtm->rtm_optlen += skb->tail - o;
}
- for (ct = c->mfc_minvif; ct <= c->mfc_maxvif; ct++) {
- if (c->mfc_ttls[ct]) {
+ for (ct = c->mfc_minvif; ct < c->mfc_maxvif; ct++) {
+ if (c->mfc_ttls[ct] < 255) {
if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
goto rtattr_failure;
nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
nhp->rtnh_flags = 0;
nhp->rtnh_hops = c->mfc_ttls[ct];
nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
- nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
+ nhp->rtnh_len = sizeof(*nhp);
rtm->rtm_nhs++;
}
}
@@ -1457,7 +1432,7 @@ int ipmr_vif_info(char *buffer, char **start, off_t offset, int length, int dumm
continue;
if (vif->dev)
name = vif->dev->name;
- size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08lX %08lX\n",
+ size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
vif->flags, vif->local, vif->remote);
len+=size;
@@ -1509,9 +1484,9 @@ int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length, int dumm
(mfc->mfc_flags & MFC_QUEUED) ? mfc->mfc_unresolved.qlen : mfc->mfc_pkt,
mfc->mfc_bytes,
mfc->mfc_wrong_if);
- for(n=mfc->mfc_minvif;n<=mfc->mfc_maxvif;n++)
+ for(n=mfc->mfc_minvif;n<mfc->mfc_maxvif;n++)
{
- if(vifc_map&(1<<n) && mfc->mfc_ttls[n])
+ if(vifc_map&(1<<n) && mfc->mfc_ttls[n] < 255)
size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_ttls[n]);
}
size += sprintf(buffer+len+size, "\n");
@@ -1536,6 +1511,10 @@ done:
len-=(offset-begin);
if(len>length)
len=length;
+ if (len < 0) {
+ len = 0;
+ printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n");
+ }
return len;
}
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 64effeb5d..638d0418f 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -5,7 +5,7 @@
*
* INET protocol dispatch tables.
*
- * Version: $Id: protocol.c,v 1.8 1997-10-10 22:41:19 davem Exp $
+ * Version: $Id: protocol.c,v 1.9 1997-10-29 20:27:34 kuznet Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -47,24 +47,6 @@
#define IPPROTO_PREVIOUS NULL
-#ifdef CONFIG_NET_IPIP
-
-static struct inet_protocol ipip_protocol =
-{
- ipip_rcv, /* IPIP handler */
- ipip_err, /* TUNNEL error control */
- IPPROTO_PREVIOUS, /* next */
- IPPROTO_IPIP, /* protocol ID */
- 0, /* copy */
- NULL, /* data */
- "IPIP" /* name */
-};
-
-#undef IPPROTO_PREVIOUS
-#define IPPROTO_PREVIOUS &ipip_protocol
-
-#endif
-
#ifdef CONFIG_IP_MULTICAST
static struct inet_protocol igmp_protocol =
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5aa0b4a98..c92fd59a0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: addrconf.c,v 1.25 1997-10-12 17:01:51 kuznet Exp $
+ * $Id: addrconf.c,v 1.26 1997-10-29 20:27:44 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -43,7 +43,8 @@
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
-#include <net/sit.h>
+#include <net/ip.h>
+#include <linux/if_tunnel.h>
#include <asm/uaccess.h>
@@ -364,7 +365,7 @@ struct inet6_ifaddr * ipv6_get_saddr(struct dst_entry *dst,
}
out:
- if (ifp == NULL && match)
+ if (ifp == NULL)
ifp = match;
atomic_dec(&addr_list_lock);
return ifp;
@@ -715,17 +716,32 @@ int addrconf_set_dstaddr(void *arg)
}
if (dev->type == ARPHRD_SIT) {
- struct device *dev;
-
+ struct ifreq ifr;
+ unsigned long oldfs;
+ struct ip_tunnel_parm p;
+
if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
return -EADDRNOTAVAIL;
-
- dev = sit_add_tunnel(ireq.ifr6_addr.s6_addr32[3]);
- if (dev == NULL)
- err = -ENODEV;
- else
- err = 0;
+ memset(&p, 0, sizeof(p));
+ p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
+ p.iph.saddr = 0;
+ p.iph.version = 4;
+ p.iph.ihl = 5;
+ p.iph.protocol = IPPROTO_IPV6;
+ p.iph.ttl = 64;
+ ifr.ifr_ifru.ifru_data = (void*)&p;
+
+ oldfs = get_fs(); set_fs(KERNEL_DS);
+ err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+ set_fs(oldfs);
+
+ if (err == 0) {
+ err = -ENOBUFS;
+ if ((dev = dev_get(p.name)) == NULL)
+ goto err_exit;
+ err = dev_open(dev);
+ }
}
err_exit:
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 788acb81b..e9178a5eb 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,7 +7,7 @@
*
* Adapted from linux/net/ipv4/af_inet.c
*
- * $Id: af_inet6.c,v 1.22 1997-09-14 08:32:05 davem Exp $
+ * $Id: af_inet6.c,v 1.23 1997-10-29 20:27:52 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -44,7 +44,7 @@
#include <net/ipv6.h>
#include <net/udp.h>
#include <net/tcp.h>
-#include <net/sit.h>
+#include <net/ipip.h>
#include <net/protocol.h>
#include <net/inet_common.h>
#include <net/transp_v6.h>
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index cddd652a4..20befbc6a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -7,7 +7,7 @@
*
* Based on linux/net/ipv4/ip_sockglue.c
*
- * $Id: ipv6_sockglue.c,v 1.14 1997-09-14 08:32:10 davem Exp $
+ * $Id: ipv6_sockglue.c,v 1.15 1997-10-29 20:27:54 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -43,7 +43,6 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/inet_common.h>
-#include <net/sit.h>
#include <net/tcp.h>
#include <net/udp.h>
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d741e215a..02ccf19c3 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: mcast.c,v 1.10 1997-05-07 09:40:22 davem Exp $
+ * $Id: mcast.c,v 1.11 1997-10-29 20:27:50 kuznet Exp $
*
* Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
*
@@ -417,7 +417,10 @@ void igmp6_send(struct in6_addr *addr, struct device *dev, int type)
skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
if (dev->hard_header) {
unsigned char ha[MAX_ADDR_LEN];
- ipv6_mc_map(addr, ha);
+ if (dev->type == ARPHRD_ETHER)
+ ipv6_mc_map(addr, ha);
+ else
+ memcpy(ha, dev->broadcast, dev->addr_len);
dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, plen);
skb->arp = 1;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 100d83928..901f5109b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -394,7 +394,10 @@ int ndisc_eth_resolv(unsigned char *h_dest, struct sk_buff *skb)
struct in6_addr *daddr;
daddr = &skb->nh.ipv6h->daddr;
- ipv6_mc_map(daddr, h_dest);
+ if (skb->dev->type == ARPHRD_ETHER)
+ ipv6_mc_map(daddr, h_dest);
+ else
+ memcpy(h_dest, skb->dev->broadcast, skb->dev->addr_len);
return 0;
}
@@ -446,7 +449,10 @@ ndisc_build_ll_hdr(struct sk_buff *skb, struct device *dev,
if (dev->hard_header) {
if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) {
nd_stats.snt_probes_mcast++;
- ipv6_mc_map(daddr, ha);
+ if (dev->type == ARPHRD_ETHER)
+ ipv6_mc_map(daddr, ha);
+ else
+ memcpy(ha, dev->broadcast, dev->addr_len);
h_dest = ha;
} else if (neigh) {
h_dest = neigh->ha;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 827912428..a096833ca 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -4,8 +4,9 @@
*
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * $Id: sit.c,v 1.18 1997-09-18 14:12:25 freitag Exp $
+ * $Id: sit.c,v 1.19 1997-10-29 20:27:41 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -13,6 +14,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/config.h>
+#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -23,6 +26,7 @@
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/icmp.h>
+#include <asm/uaccess.h>
#include <linux/init.h>
#include <net/sock.h>
@@ -31,467 +35,698 @@
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/ip.h>
#include <net/udp.h>
-#include <net/sit.h>
+#include <net/icmp.h>
+#include <net/ipip.h>
+/*
+ This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
-static int sit_init_dev(struct device *dev);
-
-static struct device sit_device = {
- "sit0",
- 0, 0, 0, 0,
- 0x0, 0,
- 0, 0, 0, NULL, sit_init_dev
-};
-
-static unsigned long sit_gc_last_run;
-static void sit_mtu_cache_gc(void);
-
-static int sit_xmit(struct sk_buff *skb,
- struct device *dev);
-static int sit_rcv(struct sk_buff *skb, unsigned short len);
-static void sit_err(struct sk_buff *skb, unsigned char *dp, int len);
+ For comments look at net/ipv4/ip_gre.c --ANK
+ */
-static int sit_open(struct device *dev);
-static int sit_close(struct device *dev);
+#define HASH_SIZE 16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
-static struct net_device_stats *sit_get_stats(struct device *dev);
+static int ipip6_fb_tunnel_init(struct device *dev);
+static int ipip6_tunnel_init(struct device *dev);
-static struct inet_protocol sit_protocol = {
- sit_rcv,
- sit_err,
- 0,
- IPPROTO_IPV6,
- 0,
- NULL,
- "IPv6"
+static struct device ipip6_fb_tunnel_dev = {
+ NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip6_fb_tunnel_init,
};
-#define SIT_NUM_BUCKETS 16
-
-struct sit_mtu_info *sit_mtu_cache[SIT_NUM_BUCKETS];
+static struct ip_tunnel ipip6_fb_tunnel = {
+ NULL, &ipip6_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"sit0", }
+};
-static int vif_num = 0;
-static struct sit_vif *vif_list = NULL;
+static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_r[HASH_SIZE];
+static struct ip_tunnel *tunnels_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_wc[1];
+static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
-static __inline__ __u32 sit_addr_hash(__u32 addr)
+static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local)
{
-
- __u32 hash_val;
-
- hash_val = addr;
-
- hash_val ^= hash_val >> 16;
- hash_val ^= hash_val >> 8;
-
- return (hash_val & (SIT_NUM_BUCKETS - 1));
+ unsigned h0 = HASH(remote);
+ unsigned h1 = HASH(local);
+ struct ip_tunnel *t;
+
+ for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr &&
+ remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ for (t = tunnels_r[h0]; t; t = t->next) {
+ if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ for (t = tunnels_l[h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+ return t;
+ return NULL;
}
-static void sit_cache_insert(__u32 addr, int mtu)
+struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create)
{
- struct sit_mtu_info *minfo;
- int hash;
-
- minfo = kmalloc(sizeof(struct sit_mtu_info), GFP_ATOMIC);
-
- if (minfo == NULL)
- return;
-
- minfo->addr = addr;
- minfo->tstamp = jiffies;
- minfo->mtu = mtu;
+ u32 remote = parms->iph.daddr;
+ u32 local = parms->iph.saddr;
+ struct ip_tunnel *t, **tp, *nt;
+ struct device *dev;
+ unsigned h = 0;
+ int prio = 0;
- hash = sit_addr_hash(addr);
+ if (remote) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ if (local) {
+ prio |= 1;
+ h ^= HASH(local);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
+ return t;
+ }
+ if (!create)
+ return NULL;
- minfo->next = sit_mtu_cache[hash];
- sit_mtu_cache[hash] = minfo;
+ MOD_INC_USE_COUNT;
+ dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
+ if (dev == NULL) {
+ MOD_DEC_USE_COUNT;
+ return NULL;
+ }
+ memset(dev, 0, sizeof(*dev) + sizeof(*t));
+ dev->priv = (void*)(dev+1);
+ nt = (struct ip_tunnel*)dev->priv;
+ nt->dev = dev;
+ dev->name = nt->parms.name;
+ dev->init = ipip6_tunnel_init;
+ memcpy(&nt->parms, parms, sizeof(*parms));
+ if (dev->name[0] == 0) {
+ int i;
+ for (i=1; i<100; i++) {
+ sprintf(dev->name, "sit%d", i);
+ if (dev_get(dev->name) == NULL)
+ break;
+ }
+ if (i==100)
+ goto failed;
+ memcpy(parms->name, dev->name, IFNAMSIZ);
+ }
+ if (register_netdevice(dev) < 0)
+ goto failed;
+
+ start_bh_atomic();
+ nt->next = t;
+ *tp = nt;
+ end_bh_atomic();
+ /* Do not decrement MOD_USE_COUNT here. */
+ return nt;
+
+failed:
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ return NULL;
}
-static struct sit_mtu_info * sit_mtu_lookup(__u32 addr)
+static void ipip6_tunnel_destroy(struct device *dev)
{
- struct sit_mtu_info *iter;
- int hash;
-
- hash = sit_addr_hash(addr);
-
- for(iter = sit_mtu_cache[hash]; iter; iter=iter->next) {
- if (iter->addr == addr) {
- iter->tstamp = jiffies;
- break;
- }
+ struct ip_tunnel *t, **tp;
+ struct ip_tunnel *t0 = (struct ip_tunnel*)dev->priv;
+ u32 remote = t0->parms.iph.daddr;
+ u32 local = t0->parms.iph.saddr;
+ unsigned h = 0;
+ int prio = 0;
+
+ if (dev == &ipip6_fb_tunnel_dev) {
+ tunnels_wc[0] = NULL;
+ return;
}
- /*
- * run garbage collector
- */
-
- if (jiffies - sit_gc_last_run > SIT_GC_FREQUENCY) {
- sit_mtu_cache_gc();
- sit_gc_last_run = jiffies;
+ if (remote) {
+ prio |= 2;
+ h ^= HASH(remote);
}
-
- return iter;
-}
-
-static void sit_mtu_cache_gc(void)
-{
- struct sit_mtu_info *iter, *back;
- unsigned long now = jiffies;
- int i;
-
- for (i=0; i < SIT_NUM_BUCKETS; i++) {
- back = NULL;
- for (iter = sit_mtu_cache[i]; iter;) {
- if (now - iter->tstamp > SIT_GC_TIMEOUT) {
- struct sit_mtu_info *old;
-
- old = iter;
- iter = iter->next;
-
- if (back)
- back->next = iter;
- else
- sit_mtu_cache[i] = iter;
-
- kfree(old);
- continue;
- }
- back = iter;
- iter = iter->next;
+ if (local) {
+ prio |= 1;
+ h ^= HASH(local);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (t == t0) {
+ *tp = t->next;
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ break;
}
}
}
-static int sit_init_dev(struct device *dev)
-{
- dev->open = sit_open;
- dev->stop = sit_close;
- dev->hard_start_xmit = sit_xmit;
- dev->get_stats = sit_get_stats;
+void ipip6_err(struct sk_buff *skb, unsigned char *dp, int len)
+{
+#ifndef I_WISH_WORLD_WERE_PERFECT
- dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+/* It is not :-( All the routers (except for Linux) return only
+ 8 bytes of packet payload. It means, that precise relaying of
+ ICMP in the real Internet is absolutely infeasible.
+ */
+ struct iphdr *iph = (struct iphdr*)dp;
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ struct ip_tunnel *t;
- if (dev->priv == NULL)
- return -ENOMEM;
+ if (len < sizeof(struct iphdr))
+ return;
- memset(dev->priv, 0, sizeof(struct net_device_stats));
+ switch (type) {
+ default:
+ case ICMP_PARAMETERPROB:
+ return;
- dev->hard_header = NULL;
- dev->rebuild_header = NULL;
- dev->set_mac_address = NULL;
- dev->hard_header_cache = NULL;
- dev->header_cache_update= NULL;
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* Soft state for pmtu is maintained by IP core. */
+ return;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+ }
- dev->type = ARPHRD_SIT;
+ t = ipip6_tunnel_lookup(iph->daddr, iph->saddr);
+ if (t == NULL || t->parms.iph.daddr == 0)
+ return;
+ if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+ return;
- dev->hard_header_len = MAX_HEADER;
- dev->mtu = 1500 - sizeof(struct iphdr);
- dev->addr_len = 0;
- dev->tx_queue_len = 0;
+ if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+ t->err_count++;
+ else
+ t->err_count = 1;
+ t->err_time = jiffies;
+ return;
+#else
+ struct iphdr *iph = (struct iphdr*)dp;
+ int hlen = iph->ihl<<2;
+ struct ipv6hdr *iph6;
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ int rel_type = 0;
+ int rel_code = 0;
+ int rel_info = 0;
+ struct sk_buff *skb2;
+ struct rt6_info *rt6i;
- memset(dev->broadcast, 0, MAX_ADDR_LEN);
- memset(dev->dev_addr, 0, MAX_ADDR_LEN);
+ if (len < hlen + sizeof(struct ipv6hdr))
+ return;
+ iph6 = (struct ipv6hdr*)(dp + hlen);
- dev->flags = IFF_NOARP;
- dev->iflink = 0;
+ switch (type) {
+ default:
+ return;
+ case ICMP_PARAMETERPROB:
+ if (skb->h.icmph->un.gateway < hlen)
+ return;
+
+ /* So... This guy found something strange INSIDE encapsulated
+ packet. Well, he is fool, but what can we do ?
+ */
+ rel_type = ICMPV6_PARAMPROB;
+ rel_info = skb->h.icmph->un.gateway - hlen;
+ break;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* Too complicated case ... */
+ return;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe, it is just ether pollution. --ANK
+ */
+ rel_type = ICMPV6_DEST_UNREACH;
+ rel_code = ICMPV6_ADDR_UNREACH;
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ rel_type = ICMPV6_TIME_EXCEED;
+ rel_code = ICMPV6_EXC_HOPLIMIT;
+ break;
+ }
- return 0;
+ /* Prepare fake skb to feed it to icmpv6_send */
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2 == NULL)
+ return;
+ dst_release(skb2->dst);
+ skb2->dst = NULL;
+ skb_pull(skb2, skb->data - (u8*)iph6);
+ skb2->nh.raw = skb2->data;
+
+ /* Try to guess incoming interface */
+ rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
+ if (rt6i && rt6i->rt6i_dev) {
+ skb2->dev = rt6i->rt6i_dev;
+
+ rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);
+
+ if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
+ struct ip_tunnel * t = (struct ip_tunnel*)rt6i->rt6i_dev->priv;
+ if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
+ rel_type = ICMPV6_DEST_UNREACH;
+ rel_code = ICMPV6_ADDR_UNREACH;
+ }
+ icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
+ }
+ }
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+#endif
}
-static int sit_init_vif(struct device *dev)
+int ipip6_rcv(struct sk_buff *skb, unsigned short len)
{
- dev->flags = IFF_NOARP|IFF_POINTOPOINT;
- dev->iflink = 0;
- dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
-
- if (dev->priv == NULL)
- return -ENOMEM;
-
- memset(dev->priv, 0, sizeof(struct net_device_stats));
-
- return 0;
-}
+ struct iphdr *iph;
+ struct ip_tunnel *tunnel;
+
+ iph = skb->nh.iph;
+
+ if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+ skb->nh.raw = skb_pull(skb, skb->h.raw - skb->data);
+ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+ skb->protocol = __constant_htons(ETH_P_IPV6);
+ skb->ip_summed = 0;
+ skb->pkt_type = PACKET_HOST;
+ tunnel->stat.rx_packets++;
+ tunnel->stat.rx_bytes += skb->len;
+ skb->dev = tunnel->dev;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ netif_rx(skb);
+ return 0;
+ }
-static int sit_open(struct device *dev)
-{
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+ kfree_skb(skb, FREE_READ);
return 0;
}
-static int sit_close(struct device *dev)
-{
- return 0;
-}
+/*
+ * This function assumes it is being called from dev_queue_xmit()
+ * and that skb is filled properly by that function.
+ */
-__initfunc(int sit_init(void))
+static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev)
{
- int i;
-
- /* register device */
+ struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct net_device_stats *stats = &tunnel->stat;
+ struct iphdr *tiph = &tunnel->parms.iph;
+ struct ipv6hdr *iph6 = skb->nh.ipv6h;
+ u8 tos = tunnel->parms.iph.tos;
+ struct rtable *rt; /* Route to the other host */
+ struct device *tdev; /* Device to other host */
+ struct iphdr *iph; /* Our new IP header */
+ int max_headroom; /* The extra header space needed */
+ u32 dst = tiph->daddr;
+ int mtu;
+ struct in6_addr *addr6;
+ int addr_type;
- if (register_netdevice(&sit_device) != 0)
- return -EIO;
+ if (tunnel->recursion++) {
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
- inet_add_protocol(&sit_protocol);
+ if (skb->protocol != __constant_htons(ETH_P_IPV6))
+ goto tx_error;
- for (i=0; i < SIT_NUM_BUCKETS; i++)
- sit_mtu_cache[i] = NULL;
+ if (!dst) {
+ struct nd_neigh *neigh = NULL;
- sit_gc_last_run = jiffies;
+ if (skb->dst)
+ neigh = (struct nd_neigh *) skb->dst->neighbour;
- return 0;
-}
+ if (neigh == NULL) {
+ printk(KERN_DEBUG "sit: nexthop == NULL\n");
+ goto tx_error;
+ }
-struct device *sit_add_tunnel(__u32 dstaddr)
-{
- struct sit_vif *vif;
- struct device *dev;
+ addr6 = &neigh->ndn_addr;
+ addr_type = ipv6_addr_type(addr6);
- if ((sit_device.flags & IFF_UP) == 0)
- return NULL;
-
- vif = kmalloc(sizeof(struct sit_vif), GFP_KERNEL);
- if (vif == NULL)
- return NULL;
-
- /*
- * Create PtoP configured tunnel
- */
-
- dev = kmalloc(sizeof(struct device), GFP_KERNEL);
- if (dev == NULL)
- return NULL;
+ if (addr_type == IPV6_ADDR_ANY) {
+ addr6 = &skb->nh.ipv6h->daddr;
+ addr_type = ipv6_addr_type(addr6);
+ }
- memcpy(dev, &sit_device, sizeof(struct device));
- dev->init = sit_init_vif;
- SIT_PEER(dev) = dstaddr;
+ if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+ goto tx_error_icmp;
- dev->name = vif->name;
- sprintf(vif->name, "sit%d", ++vif_num);
+ dst = addr6->s6_addr32[3];
+ }
- register_netdev(dev);
+ if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+ tunnel->stat.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ tdev = rt->u.dst.dev;
- vif->dev = dev;
- vif->next = vif_list;
- vif_list = vif;
+ if (tdev == dev) {
+ ip_rt_put(rt);
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
- return dev;
-}
+ mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+ if (mtu < 68) {
+ tunnel->stat.collisions++;
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ if (mtu >= 576) {
+ if (skb->dst && mtu < skb->dst->pmtu) {
+ struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
+ if (mtu < rt6->u.dst.pmtu) {
+ if (tunnel->parms.iph.daddr || rt6->rt6i_dst.plen == 128) {
+ rt6->rt6i_flags |= RTF_MODIFIED;
+ rt6->u.dst.pmtu = mtu;
+ }
+ }
+ }
+ if (skb->len > mtu) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ }
-void sit_cleanup(void)
-{
- struct sit_vif *vif;
+ if (tunnel->err_count > 0) {
+ if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+ tunnel->err_count--;
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+ } else
+ tunnel->err_count = 0;
+ }
- for (vif = vif_list; vif;) {
- struct device *dev = vif->dev;
- struct sit_vif *cur;
+ skb->h.raw = skb->nh.raw;
- unregister_netdev(dev);
- kfree(dev->priv);
- kfree(dev);
-
- cur = vif;
- vif = vif->next;
+ /*
+ * Okay, now see if we can stuff it in the buffer as-is.
+ */
+ max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
- kfree(cur);
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+ struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb) {
+ ip_rt_put(rt);
+ stats->tx_dropped++;
+ dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
+ return 0;
+ }
+ dev_kfree_skb(skb, FREE_WRITE);
+ skb = new_skb;
}
- vif_list = NULL;
-
- unregister_netdev(&sit_device);
- inet_del_protocol(&sit_protocol);
-
-}
+ skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
-/*
- * receive IPv4 ICMP messages
- */
+ /*
+ * Push down and install the IPIP header.
+ */
-static void sit_err(struct sk_buff *skb, unsigned char *dp, int len)
-{
- struct iphdr *iph = (struct iphdr*)dp;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ iph = skb->nh.iph;
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr)>>2;
+ if (mtu > 576)
+ iph->frag_off = __constant_htons(IP_DF);
+ else
+ iph->frag_off = 0;
- if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- struct sit_mtu_info *minfo;
- unsigned short info = skb->h.icmph->un.frag.mtu - sizeof(struct iphdr);
+ iph->protocol = IPPROTO_IPV6;
+ iph->tos = tos;
+ iph->daddr = rt->rt_dst;
+ iph->saddr = rt->rt_src;
- minfo = sit_mtu_lookup(iph->daddr);
+ if ((iph->ttl = tiph->ttl) == 0)
+ iph->ttl = iph6->hop_limit;
- printk(KERN_DEBUG "sit: %08lx pmtu = %ul\n", ntohl(iph->saddr),
- info);
+ iph->tot_len = htons(skb->len);
+ iph->id = htons(ip_id_count++);
+ ip_send_check(iph);
- if (minfo == NULL) {
- minfo = kmalloc(sizeof(struct sit_mtu_info),
- GFP_ATOMIC);
+ stats->tx_bytes += skb->len;
+ stats->tx_packets++;
+ ip_send(skb);
- if (minfo == NULL)
- return;
+ tunnel->recursion--;
+ return 0;
- start_bh_atomic();
- sit_cache_insert(iph->daddr, info);
- end_bh_atomic();
- } else {
- minfo->mtu = info;
- }
- }
+tx_error_icmp:
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev);
+tx_error:
+ stats->tx_errors++;
+ dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
+ return 0;
}
-static int sit_rcv(struct sk_buff *skb, unsigned short len)
+static int
+ipip6_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd)
{
- struct net_device_stats *stats;
- struct device *dev = NULL;
- struct sit_vif *vif;
- __u32 saddr = skb->nh.iph->saddr;
-
- skb->h.raw = skb->nh.raw = skb_pull(skb, skb->h.raw - skb->data);
-
- skb->protocol = __constant_htons(ETH_P_IPV6);
-
- for (vif = vif_list; vif; vif = vif->next) {
- if (saddr == SIT_PEER(vif->dev)) {
- dev = vif->dev;
- break;
+ int err = 0;
+ struct ip_tunnel_parm p;
+ struct ip_tunnel *t;
+
+ MOD_INC_USE_COUNT;
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ t = NULL;
+ if (dev == &ipip6_fb_tunnel_dev)
+ t = ipip6_tunnel_locate(&p, 0);
+ if (t == NULL)
+ t = (struct ip_tunnel*)dev->priv;
+ memcpy(&p, &t->parms, sizeof(p));
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+
+ err = -EINVAL;
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
+ p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)))
+ goto done;
+ if (p.iph.ttl)
+ p.iph.frag_off |= __constant_htons(IP_DF);
+
+ t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+ if (t) {
+ err = 0;
+ if (cmd == SIOCCHGTUNNEL) {
+ t->parms.iph.ttl = p.iph.ttl;
+ t->parms.iph.tos = p.iph.tos;
+ }
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+ err = -EFAULT;
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ if (dev == &ipip6_fb_tunnel_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+ err = -ENOENT;
+ if ((t = ipip6_tunnel_locate(&p, 0)) == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == &ipip6_fb_tunnel)
+ goto done;
}
- }
+ err = unregister_netdevice(dev);
+ break;
- if (dev == NULL)
- dev = &sit_device;
+ default:
+ err = -EINVAL;
+ }
- skb->dev = dev;
- skb->ip_summed = CHECKSUM_NONE;
+done:
+ MOD_DEC_USE_COUNT;
+ return err;
+}
- stats = (struct net_device_stats *)dev->priv;
- stats->rx_bytes += len;
- stats->rx_packets++;
+static struct net_device_stats *ipip6_tunnel_get_stats(struct device *dev)
+{
+ return &(((struct ip_tunnel*)dev->priv)->stat);
+}
- ipv6_rcv(skb, dev, NULL);
+static int ipip6_tunnel_change_mtu(struct device *dev, int new_mtu)
+{
+ if (new_mtu < 576 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ return -EINVAL;
+ dev->mtu = new_mtu;
return 0;
}
-static int sit_xmit(struct sk_buff *skb, struct device *dev)
+static void ipip6_tunnel_init_gen(struct device *dev)
{
- struct net_device_stats *stats;
- struct sit_mtu_info *minfo;
- struct in6_addr *addr6;
- struct rtable *rt;
- struct iphdr *iph;
- __u32 saddr;
- __u32 daddr;
- int addr_type;
- int mtu;
- int headroom;
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
- /*
- * Make sure we are not busy (check lock variable)
- */
+ dev->destructor = ipip6_tunnel_destroy;
+ dev->hard_start_xmit = ipip6_tunnel_xmit;
+ dev->get_stats = ipip6_tunnel_get_stats;
+ dev->do_ioctl = ipip6_tunnel_ioctl;
+ dev->change_mtu = ipip6_tunnel_change_mtu;
- stats = (struct net_device_stats *)dev->priv;
+ dev_init_buffers(dev);
- daddr = SIT_PEER(dev);
+ dev->type = ARPHRD_SIT;
+ dev->hard_header_len = MAX_HEADER + sizeof(struct iphdr);
+ dev->mtu = 1500 - sizeof(struct iphdr);
+ dev->flags = IFF_NOARP;
+ dev->iflink = 0;
+ dev->addr_len = 4;
+ memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
+ memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
+}
- if (daddr == 0) {
- struct nd_neigh *neigh = NULL;
+static int ipip6_tunnel_init(struct device *dev)
+{
+ struct device *tdev = NULL;
+ struct ip_tunnel *tunnel;
+ struct iphdr *iph;
- if (skb->dst)
- neigh = (struct nd_neigh *) skb->dst->neighbour;
+ tunnel = (struct ip_tunnel*)dev->priv;
+ iph = &tunnel->parms.iph;
- if (neigh == NULL) {
- printk(KERN_DEBUG "sit: nexthop == NULL\n");
- goto on_error;
- }
-
- addr6 = &neigh->ndn_addr;
- addr_type = ipv6_addr_type(addr6);
-
- if (addr_type == IPV6_ADDR_ANY) {
- addr6 = &skb->nh.ipv6h->daddr;
- addr_type = ipv6_addr_type(addr6);
- }
+ ipip6_tunnel_init_gen(dev);
- if ((addr_type & IPV6_ADDR_COMPATv4) == 0) {
- printk(KERN_DEBUG "sit_xmit: non v4 address\n");
- goto on_error;
+ if (iph->daddr) {
+ struct rtable *rt;
+ if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+ tdev = rt->u.dst.dev;
+ ip_rt_put(rt);
}
- daddr = addr6->s6_addr32[3];
+ dev->flags |= IFF_POINTOPOINT;
}
- if (ip_route_output(&rt, daddr, 0, 0, 0)) {
- printk(KERN_DEBUG "sit: no route to host\n");
- goto on_error;
- }
-
- minfo = sit_mtu_lookup(daddr);
-
-#if 0
- if (minfo)
- mtu = minfo->mtu;
- else
-#endif
- mtu = rt->u.dst.pmtu;
+ if (!tdev && tunnel->parms.link)
+ tdev = dev_get_by_index(tunnel->parms.link);
- if (mtu > 576 && skb->tail - (skb->data + sizeof(struct ipv6hdr)) > mtu) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
- ip_rt_put(rt);
- goto on_error;
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+ dev->mtu = tdev->mtu - sizeof(struct iphdr);
+ if (dev->mtu < 576)
+ dev->mtu = 576;
}
+ dev->iflink = tunnel->parms.link;
- headroom = ((rt->u.dst.dev->hard_header_len+15)&~15)+sizeof(struct iphdr);
-
- if (skb_headroom(skb) < headroom || skb_shared(skb)) {
- struct sk_buff *new_skb = skb_realloc_headroom(skb, headroom);
- if (!new_skb) {
- ip_rt_put(rt);
- goto on_error;
- }
- dev_kfree_skb(skb, FREE_WRITE);
- skb = new_skb;
- }
-
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-
- iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr));
- skb->nh.iph = iph;
+ return 0;
+}
- saddr = rt->rt_src;
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+#ifdef MODULE
+static int ipip6_fb_tunnel_open(struct device *dev)
+{
+ MOD_INC_USE_COUNT;
+ return 0;
+}
- iph->version = 4;
- iph->ihl = 5;
- iph->tos = 0; /* tos set to 0... */
+static int ipip6_fb_tunnel_close(struct device *dev)
+{
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+#endif
- if (mtu > 576)
- iph->frag_off = htons(IP_DF);
- else
- iph->frag_off = 0;
-
- iph->ttl = 64;
- iph->saddr = saddr;
- iph->daddr = daddr;
- iph->protocol = IPPROTO_IPV6;
- iph->tot_len = htons(skb->len);
- iph->id = htons(ip_id_count++);
- ip_send_check(iph);
+__initfunc(int ipip6_fb_tunnel_init(struct device *dev))
+{
+ struct iphdr *iph;
- ip_send(skb);
+ ipip6_tunnel_init_gen(dev);
+#ifdef MODULE
+ dev->open = ipip6_fb_tunnel_open;
+ dev->close = ipip6_fb_tunnel_close;
+#endif
- stats->tx_bytes += skb->len;
- stats->tx_packets++;
+ iph = &ipip6_fb_tunnel.parms.iph;
+ iph->version = 4;
+ iph->protocol = IPPROTO_IPV6;
+ iph->ihl = 5;
+ iph->ttl = 64;
+ tunnels_wc[0] = &ipip6_fb_tunnel;
return 0;
+}
-on_error:
- dev_kfree_skb(skb, FREE_WRITE);
- stats->tx_errors++;
- return 0;
+static struct inet_protocol sit_protocol = {
+ ipip6_rcv,
+ ipip6_err,
+ 0,
+ IPPROTO_IPV6,
+ 0,
+ NULL,
+ "IPv6"
+};
+
+#ifdef MODULE
+void sit_cleanup(void)
+{
+ inet_del_protocol(&sit_protocol);
+ unregister_netdevice(&ipip6_fb_tunnel_dev);
}
+#endif
-static struct net_device_stats *sit_get_stats(struct device *dev)
+__initfunc(int sit_init(void))
{
- return((struct net_device_stats *) dev->priv);
+ printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
+
+ ipip6_fb_tunnel_dev.priv = (void*)&ipip6_fb_tunnel;
+ ipip6_fb_tunnel_dev.name = ipip6_fb_tunnel.parms.name;
+#ifdef MODULE
+ register_netdev(&ipip6_fb_tunnel_dev);
+#else
+ register_netdevice(&ipip6_fb_tunnel_dev);
+#endif
+ inet_add_protocol(&sit_protocol);
+ return 0;
}