Hi.
Here is a small patch to make LVS keep the MARK, and have return traffic
inherit the mark.
We use this for routing purposes on a multihomed LVS server, to have
return traffic routed back the same way as from where it was received.
What we do is that we set the mark in the iptables mangle chain
depending on source interface, and in the routing table use this mark to
have return traffic routed back in the same (opposite) direction.
The patch also moves the priority of LVS INPUT hook back to infront of
iptables filter hook, this to be able to filter the traffic not picked
up by LVS but matchin it's service definitions. We are not
(yet) interested of filtering traffic to the virtual servers, but very
interested in filtering what traffic reaches the Linux LVS-box itself.
Regards
Henrik Nordstrom
SafeCore Technologies
diff -u -r ipvs-0.2.3/ipvs/Makefile ipvs-0.2.3-mara/ipvs/Makefile
--- ipvs-0.2.3/ipvs/Makefile Mon Jan 15 14:43:46 2001
+++ ipvs-0.2.3-mara/ipvs/Makefile Mon Feb 12 20:18:43 2001
@@ -9,8 +9,8 @@
CC=gcc
CFLAGS= -D__KERNEL__ -DMODULE $(SMPFLAGS) $(DEBUGFLAGS) -O2 -Wall \
- -Wstrict-prototypes -I/usr/src/linux/include \
- -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h
+ -Wstrict-prototypes
-I/develop/maraimages/HEAD.scsi/build/linux-2.4.1-mara.4/include \
+ -DMODVERSIONS -include
/develop/maraimages/HEAD.scsi/build/linux-2.4.1-mara.4/include/linux/modversions.h
all: ip_vs.o ip_vs_lc.o ip_vs_rr.o ip_vs_wlc.o ip_vs_wrr.o \
ip_vs_lblc.o ip_vs_lblcr.o ip_vs_ftp.o
diff -u -r ipvs-0.2.3/ipvs/ip_vs.h ipvs-0.2.3-mara/ipvs/ip_vs.h
--- ipvs-0.2.3/ipvs/ip_vs.h Mon Jan 29 16:29:29 2001
+++ ipvs-0.2.3-mara/ipvs/ip_vs.h Mon Feb 12 20:17:53 2001
@@ -239,6 +239,7 @@
__u32 caddr; /* client address */
__u32 vaddr; /* virtual address */
__u32 daddr; /* destination address */
+ __u32 fwmark; /* fwmark of the session */
__u16 cport;
__u16 vport;
__u16 dport;
@@ -365,7 +366,8 @@
* (from ip_vs_core.c)
*/
extern struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc,
- struct iphdr *iph);
+ struct iphdr *iph,
+ unsigned int fwmark);
extern int ip_vs_proto_doff(unsigned proto, char *th, unsigned size);
extern const char *ip_vs_proto_name(unsigned proto);
extern unsigned int check_for_ip_vs_out(struct sk_buff **skb_p,
@@ -466,6 +468,8 @@
NIPQUAD(cp->caddr),ntohs(cp->cport),
NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+ if (!cp->fwmark)
+ cp->fwmark = ctl_cp->fwmark;
cp->control = ctl_cp;
atomic_inc(&ctl_cp->n_control);
}
diff -u -r ipvs-0.2.3/ipvs/ip_vs_conn.c ipvs-0.2.3-mara/ipvs/ip_vs_conn.c
--- ipvs-0.2.3/ipvs/ip_vs_conn.c Fri Jan 26 11:19:22 2001
+++ ipvs-0.2.3-mara/ipvs/ip_vs_conn.c Mon Feb 12 20:17:53 2001
@@ -1341,13 +1341,14 @@
size = sprintf(buffer+len,
"%-3s %08X %04X %08X %04X "
- "%08X %04X %-11s %7lu\n",
+ "%08X %04X %-11s %7lu %04d\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr), ntohs(cp->cport),
ntohl(cp->vaddr), ntohs(cp->vport),
ntohl(cp->daddr), ntohs(cp->dport),
ip_vs_state_name(cp->state),
- cp->timer.expires-jiffies);
+ cp->timer.expires-jiffies,
+ (int)cp->fwmark);
len += size;
pos += size;
if (pos <= offset)
diff -u -r ipvs-0.2.3/ipvs/ip_vs_core.c ipvs-0.2.3-mara/ipvs/ip_vs_core.c
--- ipvs-0.2.3/ipvs/ip_vs_core.c Mon Jan 29 16:29:29 2001
+++ ipvs-0.2.3-mara/ipvs/ip_vs_core.c Mon Feb 12 21:27:36 2001
@@ -363,7 +363,7 @@
* It selects a server according to the virtual service, and
* creates a connection entry.
*/
-struct ip_vs_conn *ip_vs_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+struct ip_vs_conn *ip_vs_schedule(struct ip_vs_service *svc, struct iphdr
*iph, unsigned int fwmark)
{
struct ip_vs_conn *cp = NULL;
struct ip_vs_dest *dest;
@@ -405,6 +405,7 @@
IP_VS_ERR("Schedule: ip_vs_conn_new failed\n");
return NULL;
}
+ cp->fwmark = fwmark;
/*
* Bind the connection entry with the vs dest.
@@ -679,6 +680,7 @@
cp = ip_vs_conn_out_get(iph->protocol, iph->saddr, h.portp[0],
iph->daddr, h.portp[1]);
if (!cp) {
+#if 0
if (ip_vs_lookup_real_service(iph->protocol,
iph->saddr, h.portp[0])) {
/*
@@ -692,6 +694,7 @@
return NF_STOLEN;
}
}
+#endif
IP_VS_DBG(12, "packet for %s %d.%d.%d.%d:%d "
"continue traversal as normal.\n",
ip_vs_proto_name(iph->protocol),
@@ -999,6 +1002,46 @@
/*
+ * Check if it's a established session. If so then restore
+ * the fwmark value
+ */
+static unsigned int ip_vs_in2(unsigned int hooknum,
+ struct sk_buff **skb_p,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *skb = *skb_p;
+ struct iphdr *iph = skb->nh.iph;
+ union ip_vs_tphdr h;
+ struct ip_vs_conn *cp;
+
+ h.raw = (char*) iph + iph->ihl * 4;
+
+ IP_VS_DBG(2, "Incoming2 %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d\n",
+ ip_vs_proto_name(iph->protocol),
+ NIPQUAD(iph->saddr), ntohs(h.portp[0]),
+ NIPQUAD(iph->daddr), ntohs(h.portp[1]));
+
+ /*
+ * Is there a established session?
+ */
+ cp = ip_vs_conn_out_get(iph->protocol, iph->saddr, h.portp[0],
+ iph->daddr, h.portp[1]);
+
+ /*
+ * Restore the fwmark
+ */
+ if (cp) {
+ skb->nfmark = cp->fwmark;
+ skb->nfcache |= NFC_ALTERED;
+ __ip_vs_conn_put(cp);
+ }
+
+ return NF_ACCEPT;
+}
+
+/*
* Check if it's for virtual services, look it up,
* and send it on its way...
*/
@@ -1093,7 +1136,7 @@
* Let the virtual server select a real server for the
* incomming connection, and create a connection entry.
*/
- cp = ip_vs_schedule(svc, iph);
+ cp = ip_vs_schedule(svc, iph, skb->nfmark);
if (!cp)
return ip_vs_leave(svc, skb);
ip_vs_service_put(svc);
@@ -1153,7 +1196,13 @@
applied to IPVS. */
static struct nf_hook_ops ip_vs_in_ops = {
{ NULL, NULL },
- ip_vs_in, PF_INET, NF_IP_LOCAL_IN, 100
+ ip_vs_in, PF_INET, NF_IP_LOCAL_IN, -10
+};
+
+/* Before filtering and routing, restore the fwmark on established sessions */
+static struct nf_hook_ops ip_vs_in2_ops = {
+ { NULL, NULL },
+ ip_vs_in2, PF_INET, NF_IP_PRE_ROUTING, -200
};
/* After packet filtering, change source only for VS/NAT */
@@ -1203,10 +1252,15 @@
goto cleanup_conn;
}
+ ret = nf_register_hook(&ip_vs_in2_ops);
+ if (ret < 0) {
+ IP_VS_ERR("can't register in2 hook.\n");
+ goto cleanup_app;
+ }
ret = nf_register_hook(&ip_vs_in_ops);
if (ret < 0) {
IP_VS_ERR("can't register in hook.\n");
- goto cleanup_app;
+ goto cleanup_in2ops;
}
ret = nf_register_hook(&ip_vs_out_ops);
if (ret < 0) {
@@ -1233,6 +1287,8 @@
nf_unregister_hook(&ip_vs_out_ops);
cleanup_inops:
nf_unregister_hook(&ip_vs_in_ops);
+ cleanup_in2ops:
+ nf_unregister_hook(&ip_vs_in2_ops);
cleanup_app:
ip_vs_app_cleanup();
cleanup_conn:
@@ -1249,6 +1305,7 @@
nf_unregister_hook(&ip_vs_post_routing_ops);
nf_unregister_hook(&ip_vs_out_ops);
nf_unregister_hook(&ip_vs_in_ops);
+ nf_unregister_hook(&ip_vs_in2_ops);
ip_vs_app_cleanup();
ip_vs_conn_cleanup();
ip_vs_sltimer_cleanup();
diff -u -r ipvs-0.2.3/ipvs/ip_vs_ctl.c ipvs-0.2.3-mara/ipvs/ip_vs_ctl.c
--- ipvs-0.2.3/ipvs/ip_vs_ctl.c Mon Jan 29 16:29:29 2001
+++ ipvs-0.2.3-mara/ipvs/ip_vs_ctl.c Mon Feb 12 20:17:53 2001
@@ -369,6 +369,7 @@
read_lock(&__ip_vs_svc_lock);
+#if 0
if (fwmark) {
/*
* Check the table hashed by fwmark first
@@ -377,6 +378,7 @@
if (svc)
goto out;
}
+#endif
/*
* Check the table hashed by <protocol,addr,port>
|