LVS
lvs-devel
Google
 
Web LinuxVirtualServer.org

[PATCH 1/2] ipvs: load balance IPv4 connections from a local process

To: netdev@xxxxxxxxxxxxxxx, lvs-devel@xxxxxxxxxxxxxxx
Subject: [PATCH 1/2] ipvs: load balance IPv4 connections from a local process
Cc: Malcolm Turnbull <malcolm@xxxxxxxxxxxxxxxx>, Siim Põder <siim@xxxxxxxxxxxxxxx>, Julius Volz <juliusv@xxxxxxxxxx>, Vince Busam <vbusam@xxxxxxxxxx>
From: Simon Horman <horms@xxxxxxxxxxxx>
Date: Fri, 5 Sep 2008 11:36:11 +1000
From: Malcolm Turnbull <malcolm@xxxxxxxxxxxxxxxx>

ipvs: load balance IPv4 connections from a local process

This allows IPVS to load balance connections made by a local process.
For example a proxy server running locally.

External client --> pound:443 -> Local:443 --> IPVS:80 --> RealServer

Signed-off-by: Siim Põder <siim@xxxxxxxxxxxxxxx>
Signed-off-by: Malcolm Turnbull <malcolm@xxxxxxxxxxxxxxxx>
Signed-off-by: Simon Horman <horms@xxxxxxxxxxxx>

--- 

 net/ipv4/ipvs/ip_vs_core.c      |  224 ++++++++++++++++++++++-----------------
 net/ipv4/ipvs/ip_vs_proto_tcp.c |    4 
 2 files changed, 134 insertions(+), 94 deletions(-)

* Simon Horman, Wed, 03 Sep 2008 14:50:36 +1000

  I have updated this patch so that it will apply on top
  of the current IPv6 patches.

  http://marc.info/?l=linux-netdev&m=122036407428246&w=2

  I have also updated the patch so that it does not handle IPv6 packets.

  I have an additional patch that I will provide to exetend
  the code to handle IPv6 connections.

* Simon Horman, Fri, 05 Sep 2008 11:32:38 +1000

  I have applied this patch to the net-next-2.6 branck of lvs-2.6

  git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-2.6.git

Index: lvs-2.6/net/ipv4/ipvs/ip_vs_core.c
===================================================================
--- lvs-2.6.orig/net/ipv4/ipvs/ip_vs_core.c     2008-09-03 11:01:38.000000000 
+1000
+++ lvs-2.6/net/ipv4/ipvs/ip_vs_core.c  2008-09-03 12:17:49.000000000 +1000
@@ -651,12 +651,53 @@ void ip_vs_nat_icmp_v6(struct sk_buff *s
 }
 #endif
 
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host. Used for NAT and local client.
+ */
+static int handle_response_icmp(struct sk_buff *skb, struct iphdr *iph,
+                               struct iphdr *cih, struct ip_vs_conn *cp,
+                               struct ip_vs_protocol *pp,
+                               unsigned int offset, unsigned int ihl)
+{
+       unsigned int verdict = NF_DROP;
+
+       if (IP_VS_FWD_METHOD(cp) != 0) {
+               IP_VS_ERR("shouldn't reach here, because the box is on the "
+                         "half connection in the tun/dr module.\n");
+       }
+
+       /* Ensure the checksum is correct */
+       if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+               /* Failed checksum! */
+               IP_VS_DBG(1,
+                         "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
+                         NIPQUAD(iph->saddr));
+               goto out;
+       }
+
+       if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+               offset += 2 * sizeof(__u16);
+       if (!skb_make_writable(skb, offset))
+               goto out;
+
+       ip_vs_nat_icmp(skb, pp, cp, 1);
+
+       /* do the statistics and put it back */
+       ip_vs_out_stats(cp, skb);
+
+       skb->ipvs_property = 1;
+       verdict = NF_ACCEPT;
+
+out:
+       __ip_vs_conn_put(cp);
+
+       return verdict;
+}
+
 /*
  *     Handle ICMP messages in the inside-to-outside direction (outgoing).
- *     Find any that might be relevant, check against existing connections,
- *     forward to the right destination host if relevant.
+ *     Find any that might be relevant, check against existing connections.
  *     Currently handles error types - unreachable, quench, ttl exceeded.
- *     (Only used in VS/NAT)
  */
 static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 {
@@ -666,7 +707,7 @@ static int ip_vs_out_icmp(struct sk_buff
        struct ip_vs_iphdr ciph;
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
-       unsigned int offset, ihl, verdict;
+       unsigned int offset, ihl;
 
        *related = 1;
 
@@ -725,38 +766,7 @@ static int ip_vs_out_icmp(struct sk_buff
        if (!cp)
                return NF_ACCEPT;
 
-       verdict = NF_DROP;
-
-       if (IP_VS_FWD_METHOD(cp) != 0) {
-               IP_VS_ERR("shouldn't reach here, because the box is on the "
-                         "half connection in the tun/dr module.\n");
-       }
-
-       /* Ensure the checksum is correct */
-       if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
-               /* Failed checksum! */
-               IP_VS_DBG(1, "Forward ICMP: failed checksum from 
%d.%d.%d.%d!\n",
-                         NIPQUAD(iph->saddr));
-               goto out;
-       }
-
-       if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
-               offset += 2 * sizeof(__u16);
-       if (!skb_make_writable(skb, offset))
-               goto out;
-
-       ip_vs_nat_icmp(skb, pp, cp, 1);
-
-       /* do the statistics and put it back */
-       ip_vs_out_stats(cp, skb);
-
-       skb->ipvs_property = 1;
-       verdict = NF_ACCEPT;
-
-  out:
-       __ip_vs_conn_put(cp);
-
-       return verdict;
+       return handle_response_icmp(skb, iph, cih, cp, pp, offset, ihl);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -875,10 +885,76 @@ static inline int is_tcp_reset(const str
        return th->rst;
 }
 
+/* Handle response packets: rewrite addresses and send away...
+ * Used for NAT and local client.
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+               struct ip_vs_conn *cp, int ihl)
+{
+       IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+       if (!skb_make_writable(skb, ihl))
+               goto drop;
+
+       /* mangle the packet */
+       if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+               goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+       else
+#endif
+       {
+               ip_hdr(skb)->saddr = cp->vaddr.ip;
+               ip_send_check(ip_hdr(skb));
+       }
+
+       /* For policy routing, packets originating from this
+        * machine itself may be routed differently to packets
+        * passing through.  We want this packet to be routed as
+        * if it came from this machine itself.  So re-compute
+        * the routing information.
+        */
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6) {
+               if (ip6_route_me_harder(skb) != 0)
+                       goto drop;
+       } else
+#endif
+               if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+                       goto drop;
+
+       /* For policy routing, packets originating from this
+        * machine itself may be routed differently to packets
+        * passing through.  We want this packet to be routed as
+        * if it came from this machine itself.  So re-compute
+        * the routing information.
+        */
+       if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+               goto drop;
+
+       IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+       ip_vs_out_stats(cp, skb);
+       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+       ip_vs_conn_put(cp);
+
+       skb->ipvs_property = 1;
+
+       LeaveFunction(11);
+       return NF_ACCEPT;
+
+drop:
+       ip_vs_conn_put(cp);
+       kfree_skb(skb);
+       return NF_STOLEN;
+}
+
 /*
  *     It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
- *     Check if outgoing packet belongs to the established ip_vs_conn,
- *      rewrite addresses of the packet and send it on its way...
+ *     Check if outgoing packet belongs to the established ip_vs_conn.
  */
 static unsigned int
 ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
@@ -987,55 +1063,7 @@ ip_vs_out(unsigned int hooknum, struct s
                return NF_ACCEPT;
        }
 
-       IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
-
-       if (!skb_make_writable(skb, iph.len))
-               goto drop;
-
-       /* mangle the packet */
-       if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
-               goto drop;
-
-#ifdef CONFIG_IP_VS_IPV6
-       if (af == AF_INET6)
-               ipv6_hdr(skb)->saddr = cp->vaddr.in6;
-       else
-#endif
-       {
-               ip_hdr(skb)->saddr = cp->vaddr.ip;
-               ip_send_check(ip_hdr(skb));
-       }
-
-       /* For policy routing, packets originating from this
-        * machine itself may be routed differently to packets
-        * passing through.  We want this packet to be routed as
-        * if it came from this machine itself.  So re-compute
-        * the routing information.
-        */
-#ifdef CONFIG_IP_VS_IPV6
-       if (af == AF_INET6) {
-               if (ip6_route_me_harder(skb) != 0)
-                       goto drop;
-       } else
-#endif
-               if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
-                       goto drop;
-
-       IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
-
-       ip_vs_out_stats(cp, skb);
-       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
-       ip_vs_conn_put(cp);
-
-       skb->ipvs_property = 1;
-
-       LeaveFunction(11);
-       return NF_ACCEPT;
-
-  drop:
-       ip_vs_conn_put(cp);
-       kfree_skb(skb);
-       return NF_STOLEN;
+       return handle_response(af, skb, pp, cp, iph.len);
 }
 
 
@@ -1111,8 +1139,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *
        ip_vs_fill_iphdr(AF_INET, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
        cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
-       if (!cp)
+       if (!cp) {
+               /* The packet could also belong to a local client */
+               cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+               if (cp)
+                       return handle_response_icmp(skb, iph, cih, cp, pp,
+                                                   offset, ihl);
                return NF_ACCEPT;
+       }
 
        verdict = NF_DROP;
 
@@ -1244,11 +1278,12 @@ ip_vs_in(unsigned int hooknum, struct sk
        ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
        /*
-        *      Big tappo: only PACKET_HOST (neither loopback nor mcasts)
-        *      ... don't know why 1st test DOES NOT include 2nd (?)
+        *      Big tappo: only PACKET_HOST, including loopback for local client
+        *      Don't handle local packets on IPv6 for now
         */
-       if (unlikely(skb->pkt_type != PACKET_HOST
-                    || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
+       if (unlikely(skb->pkt_type != PACKET_HOST ||
+                    (af == AF_INET6 || (skb->dev->flags & IFF_LOOPBACK ||
+                                        skb->sk)))) {
                IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
                              skb->pkt_type,
                              iph.protocol,
@@ -1277,6 +1312,11 @@ ip_vs_in(unsigned int hooknum, struct sk
        if (unlikely(!cp)) {
                int v;
 
+               /* For local client packets, it could be a response */
+               cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+               if (cp)
+                       return handle_response(af, skb, pp, cp, iph.len);
+
                if (!pp->conn_schedule(af, skb, pp, &v, &cp))
                        return v;
        }
Index: lvs-2.6/net/ipv4/ipvs/ip_vs_proto_tcp.c
===================================================================
--- lvs-2.6.orig/net/ipv4/ipvs/ip_vs_proto_tcp.c        2008-09-03 
10:56:05.000000000 +1000
+++ lvs-2.6/net/ipv4/ipvs/ip_vs_proto_tcp.c     2008-09-03 11:24:26.000000000 
+1000
@@ -166,7 +166,7 @@ tcp_snat_handler(struct sk_buff *skb,
        tcph->source = cp->vport;
 
        /* Adjust TCP checksums */
-       if (!cp->app) {
+       if (!cp->app && (tcph->check != 0)) {
                /* Only port and addr are changed, do fast csum update */
                tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
                                     cp->dport, cp->vport);
@@ -235,7 +235,7 @@ tcp_dnat_handler(struct sk_buff *skb,
        /*
         *      Adjust TCP checksums
         */
-       if (!cp->app) {
+       if (!cp->app && (tcph->check != 0)) {
                /* Only port and addr are changed, do fast csum update */
                tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
                                     cp->vport, cp->dport);
--
To unsubscribe from this list: send the line "unsubscribe lvs-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

<Prev in Thread] Current Thread [Next in Thread>