LVS
lvs-users
Google
 
Web LinuxVirtualServer.org

LVS with mark tracking

To: lvs-users@xxxxxxxxxxxxxxxxxxxxxx
Subject: LVS with mark tracking
From: Henrik Nordstrom <hno@xxxxxxxxxxx>
Date: Wed, 14 Feb 2001 17:29:47 +0100
Hi.

Here is a small patch to make LVS keep the MARK, and have return traffic
inherit the mark.

We use this for routing purposes on a multihomed LVS server, to have
return traffic routed back the same way as from where it was received.
What we do is that we set the mark in the iptables mangle chain
depending on source interface, and in the routing table use this mark to
have return traffic routed back in the same (opposite) direction.

The patch also moves the priority of LVS INPUT hook back to infront of
iptables filter hook, this to be able to filter the traffic not picked
up by LVS but matchin it's service definitions. We are not
(yet) interested of filtering traffic to the virtual servers, but very
interested in filtering what traffic reaches the Linux LVS-box itself.

Regards
Henrik Nordstrom
SafeCore Technologies
diff -u -r ipvs-0.2.3/ipvs/Makefile ipvs-0.2.3-mara/ipvs/Makefile
--- ipvs-0.2.3/ipvs/Makefile    Mon Jan 15 14:43:46 2001
+++ ipvs-0.2.3-mara/ipvs/Makefile       Mon Feb 12 20:18:43 2001
@@ -9,8 +9,8 @@
 
 CC=gcc
 CFLAGS= -D__KERNEL__ -DMODULE $(SMPFLAGS) $(DEBUGFLAGS) -O2 -Wall \
-       -Wstrict-prototypes -I/usr/src/linux/include \
-       -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h
+       -Wstrict-prototypes 
-I/develop/maraimages/HEAD.scsi/build/linux-2.4.1-mara.4/include \
+       -DMODVERSIONS -include 
/develop/maraimages/HEAD.scsi/build/linux-2.4.1-mara.4/include/linux/modversions.h
 
 all:   ip_vs.o ip_vs_lc.o ip_vs_rr.o ip_vs_wlc.o ip_vs_wrr.o \
        ip_vs_lblc.o ip_vs_lblcr.o ip_vs_ftp.o
diff -u -r ipvs-0.2.3/ipvs/ip_vs.h ipvs-0.2.3-mara/ipvs/ip_vs.h
--- ipvs-0.2.3/ipvs/ip_vs.h     Mon Jan 29 16:29:29 2001
+++ ipvs-0.2.3-mara/ipvs/ip_vs.h        Mon Feb 12 20:17:53 2001
@@ -239,6 +239,7 @@
         __u32                   caddr;          /* client address */
         __u32                   vaddr;          /* virtual address */
         __u32                   daddr;          /* destination address */
+        __u32                   fwmark;         /* fwmark of the session */
         __u16                   cport;
         __u16                   vport;
         __u16                   dport;
@@ -365,7 +366,8 @@
  *      (from ip_vs_core.c)
  */
 extern struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc,
-                                          struct iphdr *iph);
+                                          struct iphdr *iph,
+                                          unsigned int fwmark);
 extern int ip_vs_proto_doff(unsigned proto, char *th, unsigned size);
 extern const char *ip_vs_proto_name(unsigned proto);
 extern unsigned int check_for_ip_vs_out(struct sk_buff **skb_p,
@@ -466,6 +468,8 @@
                   NIPQUAD(cp->caddr),ntohs(cp->cport),
                   NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
         
+       if (!cp->fwmark)
+           cp->fwmark = ctl_cp->fwmark;
        cp->control = ctl_cp;
        atomic_inc(&ctl_cp->n_control);
 }
diff -u -r ipvs-0.2.3/ipvs/ip_vs_conn.c ipvs-0.2.3-mara/ipvs/ip_vs_conn.c
--- ipvs-0.2.3/ipvs/ip_vs_conn.c        Fri Jan 26 11:19:22 2001
+++ ipvs-0.2.3-mara/ipvs/ip_vs_conn.c   Mon Feb 12 20:17:53 2001
@@ -1341,13 +1341,14 @@
 
                         size = sprintf(buffer+len,
                                        "%-3s %08X %04X %08X %04X "
-                                       "%08X %04X %-11s %7lu\n",
+                                       "%08X %04X %-11s %7lu %04d\n",
                                        ip_vs_proto_name(cp->protocol),
                                        ntohl(cp->caddr), ntohs(cp->cport),
                                        ntohl(cp->vaddr), ntohs(cp->vport),
                                        ntohl(cp->daddr), ntohs(cp->dport),
                                        ip_vs_state_name(cp->state),
-                                       cp->timer.expires-jiffies);
+                                       cp->timer.expires-jiffies,
+                                      (int)cp->fwmark);
                         len += size;
                         pos += size;
                         if (pos <= offset)
diff -u -r ipvs-0.2.3/ipvs/ip_vs_core.c ipvs-0.2.3-mara/ipvs/ip_vs_core.c
--- ipvs-0.2.3/ipvs/ip_vs_core.c        Mon Jan 29 16:29:29 2001
+++ ipvs-0.2.3-mara/ipvs/ip_vs_core.c   Mon Feb 12 21:27:36 2001
@@ -363,7 +363,7 @@
  *  It selects a server according to the virtual service, and
  *  creates a connection entry.
  */
-struct ip_vs_conn *ip_vs_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+struct ip_vs_conn *ip_vs_schedule(struct ip_vs_service *svc, struct iphdr 
*iph, unsigned int fwmark)
 {
        struct ip_vs_conn *cp = NULL;
        struct ip_vs_dest *dest;
@@ -405,6 +405,7 @@
                IP_VS_ERR("Schedule: ip_vs_conn_new failed\n");
                return NULL;
        }
+       cp->fwmark = fwmark;
 
        /*
         *    Bind the connection entry with the vs dest.
@@ -679,6 +680,7 @@
         cp = ip_vs_conn_out_get(iph->protocol, iph->saddr, h.portp[0],
                                 iph->daddr, h.portp[1]);
         if (!cp) {
+#if 0
                if (ip_vs_lookup_real_service(iph->protocol,
                                               iph->saddr, h.portp[0])) {
                        /*
@@ -692,6 +694,7 @@
                                return NF_STOLEN;
                        }
                }
+#endif
                 IP_VS_DBG(12, "packet for %s %d.%d.%d.%d:%d "
                           "continue traversal as normal.\n",
                           ip_vs_proto_name(iph->protocol),
@@ -999,6 +1002,46 @@
 
 
 /*
+ *     Check if it's a established session. If so then restore
+ *     the fwmark value
+ */
+static unsigned int ip_vs_in2(unsigned int hooknum,
+                            struct sk_buff **skb_p,
+                            const struct net_device *in,
+                            const struct net_device *out,
+                            int (*okfn)(struct sk_buff *))
+{
+       struct sk_buff  *skb = *skb_p;
+       struct iphdr    *iph = skb->nh.iph;
+       union ip_vs_tphdr h;
+       struct ip_vs_conn *cp;
+
+       h.raw = (char*) iph + iph->ihl * 4;
+
+       IP_VS_DBG(2, "Incoming2 %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d\n",
+                 ip_vs_proto_name(iph->protocol),
+                 NIPQUAD(iph->saddr), ntohs(h.portp[0]),
+                 NIPQUAD(iph->daddr), ntohs(h.portp[1]));
+
+       /*
+        * Is there a established session?
+        */
+        cp = ip_vs_conn_out_get(iph->protocol, iph->saddr, h.portp[0],
+                                iph->daddr, h.portp[1]);
+
+       /*
+        * Restore the fwmark
+        */
+       if (cp) {
+           skb->nfmark = cp->fwmark;
+           skb->nfcache |= NFC_ALTERED;
+           __ip_vs_conn_put(cp);
+       }
+
+       return NF_ACCEPT;
+}
+
+/*
  *     Check if it's for virtual services, look it up,
  *     and send it on its way...
  */
@@ -1093,7 +1136,7 @@
                 * Let the virtual server select a real server for the
                  * incomming connection, and create a connection entry.
                 */
-               cp = ip_vs_schedule(svc, iph);
+               cp = ip_vs_schedule(svc, iph, skb->nfmark);
                if (!cp)
                        return ip_vs_leave(svc, skb);
                 ip_vs_service_put(svc);
@@ -1153,7 +1196,13 @@
    applied to IPVS. */
 static struct nf_hook_ops ip_vs_in_ops = {
         { NULL, NULL },
-        ip_vs_in, PF_INET, NF_IP_LOCAL_IN, 100
+        ip_vs_in, PF_INET, NF_IP_LOCAL_IN, -10
+};
+
+/* Before filtering and routing, restore the fwmark on established sessions */
+static struct nf_hook_ops ip_vs_in2_ops = {
+       { NULL, NULL },
+       ip_vs_in2, PF_INET, NF_IP_PRE_ROUTING, -200
 };
 
 /* After packet filtering, change source only for VS/NAT */
@@ -1203,10 +1252,15 @@
                goto cleanup_conn;
        }
 
+       ret = nf_register_hook(&ip_vs_in2_ops);
+       if (ret < 0) {
+               IP_VS_ERR("can't register in2 hook.\n");
+               goto cleanup_app;
+       }
        ret = nf_register_hook(&ip_vs_in_ops);
        if (ret < 0) {
                IP_VS_ERR("can't register in hook.\n");
-               goto cleanup_app;
+               goto cleanup_in2ops;
        }
        ret = nf_register_hook(&ip_vs_out_ops);
        if (ret < 0) {
@@ -1233,6 +1287,8 @@
        nf_unregister_hook(&ip_vs_out_ops);
   cleanup_inops:
        nf_unregister_hook(&ip_vs_in_ops);
+  cleanup_in2ops:
+       nf_unregister_hook(&ip_vs_in2_ops);
   cleanup_app:
        ip_vs_app_cleanup();
   cleanup_conn:
@@ -1249,6 +1305,7 @@
        nf_unregister_hook(&ip_vs_post_routing_ops);
        nf_unregister_hook(&ip_vs_out_ops);
        nf_unregister_hook(&ip_vs_in_ops);
+       nf_unregister_hook(&ip_vs_in2_ops);
        ip_vs_app_cleanup();
        ip_vs_conn_cleanup();
        ip_vs_sltimer_cleanup();
diff -u -r ipvs-0.2.3/ipvs/ip_vs_ctl.c ipvs-0.2.3-mara/ipvs/ip_vs_ctl.c
--- ipvs-0.2.3/ipvs/ip_vs_ctl.c Mon Jan 29 16:29:29 2001
+++ ipvs-0.2.3-mara/ipvs/ip_vs_ctl.c    Mon Feb 12 20:17:53 2001
@@ -369,6 +369,7 @@
 
        read_lock(&__ip_vs_svc_lock);
 
+#if 0
        if (fwmark) {
                /*      
                 *      Check the table hashed by fwmark first
@@ -377,6 +378,7 @@
                 if (svc)
                         goto out;
        }
+#endif
         
         /*     
          *     Check the table hashed by <protocol,addr,port>
<Prev in Thread] Current Thread [Next in Thread>