On Tue, 11 Jul 2006 09:41:54 +0200, Roberto Nibali wrote:
> Hi,
>
>> this is a second time I'am addressing list with this question. I have
>> been using LVS with ops patch, which enables invoking scheduler module
>> for every received packet (no connection caching).
>
> I have just briefly skimmed over this patch and am a bit unsure how
> efficient it is, but it seems to not populate the template cache. Why
> invoking the scheduler module for every packet is special, I fail to
> see. Having non-persistent scheduling to does the same. But I've only
> looked at the patch for 2 minutes.
Isn't the difference that persistance acts on connections,
where as this patch acts on packets? Of course, UDP ones,
TCP would make no sense.
>> This feature is
>> important to me because I'am using custom scheduler module which expects
>> to receive every packet belonging to same port and IP address pair.
>
> How does this look?
>
>> Is it possible to achieve same behavior of LVS on some other way besides
>> using OPS (one packet scheduling) especially on newer kernels? Problem
>> with OPS is that it is not maintained for newer kernels.
>
> Which newer kernels do you mean exactly? I would believe that
>
> http://www.ssi.bg/~ja/
>
> still applies to the latest 2.4.x kernel and that
>
> http://archive.linuxvirtualserver.org/html/lvs-users/2005-09/msg00214.html
>
> still applies to the latest 2.6.x kernel with some fuzz, of course. We
> have actually never reviewed this patch, so I wonder if it would be time
> to review it and submit for inclusion. Could you give me an exact
> explanation what it does and how it's used?
I seem to recall needing a similar feature myself at some stage.
The usage that I had was related to SIP, where all packets seem to
come and go to and from the same ports and addresses, but can actually
be loadbalanced independantly.
I am quite happy to push this to DaveM for inclusion. After some reveiw
of course. I will start with a review myself, the patch is taken from
the archive.linuxvirtualserver.org URL above.
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
> diff -ruN linux-2.6.13.orig/include/net/ip_vs.h
> linux-2.6.13/include/net/ip_vs.h
> --- linux-2.6.13.orig/include/net/ip_vs.h 2005-09-28 00:49:21.000000000
> +0200
> +++ linux-2.6.13/include/net/ip_vs.h 2005-09-28 14:39:32.000000000 +0200
> @@ -19,6 +19,7 @@
> */
> #define IP_VS_SVC_F_PERSISTENT 0x0001 /* persistent port */
> #define IP_VS_SVC_F_HASHED 0x0002 /* hashed entry */
> +#define IP_VS_SVC_F_ONEPACKET 0x0004 /* one-packet
> scheduling */
>
> /*
> * Destination Server Flags
> @@ -84,6 +85,7 @@
> #define IP_VS_CONN_F_IN_SEQ 0x0400 /* must do input seq adjust */
> #define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask
> */
> #define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set
> yet */
> +#define IP_VS_CONN_F_ONE_PACKET 0x1000 /* forward only one
> packet */
>
> /* Move it to better place one day, for now keep it unique */
> #define NFC_IPVS_PROPERTY 0x10000
> diff -ruN linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_conn.c
> linux-2.6.13/net/ipv4/ipvs/ip_vs_conn.c
> --- linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_conn.c 2005-09-28
> 00:49:23.000000000 +0200
> +++ linux-2.6.13/net/ipv4/ipvs/ip_vs_conn.c 2005-09-28 02:54:55.000000000
> +0200
> @@ -127,6 +127,9 @@
> unsigned hash;
> int ret;
>
> + if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
> + return 0;
> +
> /* Hash by protocol, client address and port */
> hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
>
> @@ -275,6 +278,11 @@
> */
> void ip_vs_conn_put(struct ip_vs_conn *cp)
> {
> + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) {
> + ip_vs_conn_expire_now(cp);
> + return;
> + }
> +
> /* reset it expire in its timeout */
> mod_timer(&cp->timer, jiffies+cp->timeout);
>
> @@ -506,7 +514,7 @@
> /*
> * unhash it if it is hashed in the conn table
> */
> - if (!ip_vs_conn_unhash(cp))
> + if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
> goto expire_later;
>
> /*
> diff -ruN linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_core.c
> linux-2.6.13/net/ipv4/ipvs/ip_vs_core.c
> --- linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_core.c 2005-09-28
> 00:49:23.000000000 +0200
> +++ linux-2.6.13/net/ipv4/ipvs/ip_vs_core.c 2005-09-28 02:54:55.000000000
> +0200
> @@ -215,6 +215,7 @@
> struct ip_vs_dest *dest;
> struct ip_vs_conn *ct;
> __u16 dport; /* destination port to forward */
> + __u16 flags;
> __u32 snet; /* source network of the client, after masking */
>
> /* Mask saddr with the netmask to adjust template granularity */
> @@ -345,6 +346,9 @@
> dport = ports[1];
> }
>
> + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
> + && iph->protocol == IPPROTO_UDP)?
> + IP_VS_CONN_F_ONE_PACKET : 0;
Could this be changed to an if then construct?
> /*
> * Create a new connection according to the template
> */
> @@ -352,7 +356,7 @@
> iph->saddr, ports[0],
> iph->daddr, ports[1],
> dest->addr, dport,
> - 0,
> + flags,
> dest);
> if (cp == NULL) {
> ip_vs_conn_put(ct);
> @@ -383,6 +387,7 @@
> struct iphdr *iph = skb->nh.iph;
> struct ip_vs_dest *dest;
> __u16 _ports[2], *pptr;
> + __u16 flags;
>
> pptr = skb_header_pointer(skb, iph->ihl*4,
> sizeof(_ports), _ports);
> @@ -412,6 +417,9 @@
> return NULL;
> }
>
> + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
> + && iph->protocol == IPPROTO_UDP)?
> + IP_VS_CONN_F_ONE_PACKET : 0;
Again, if then, please.
> /*
> * Create a connection entry.
> */
> @@ -419,7 +427,7 @@
> iph->saddr, pptr[0],
> iph->daddr, pptr[1],
> dest->addr, dest->port?dest->port:pptr[1],
> - 0,
> + flags,
> dest);
> if (cp == NULL)
> return NULL;
> @@ -462,6 +470,9 @@
> && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
> int ret, cs;
> struct ip_vs_conn *cp;
> + __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
> + iph->protocol == IPPROTO_UDP)?
> + IP_VS_CONN_F_ONE_PACKET : 0;
>
> ip_vs_service_put(svc);
>
> @@ -471,7 +482,7 @@
> iph->saddr, pptr[0],
> iph->daddr, pptr[1],
> 0, 0,
> - IP_VS_CONN_F_BYPASS,
> + IP_VS_CONN_F_BYPASS | flags,
> NULL);
> if (cp == NULL)
> return NF_DROP;
> diff -ruN linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_ctl.c
> linux-2.6.13/net/ipv4/ipvs/ip_vs_ctl.c
> --- linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_ctl.c 2005-09-28
> 00:49:23.000000000 +0200
> +++ linux-2.6.13/net/ipv4/ipvs/ip_vs_ctl.c 2005-09-28 14:49:20.000000000
> +0200
> @@ -1753,14 +1753,18 @@
> const struct ip_vs_dest *dest;
>
> if (iter->table == ip_vs_svc_table)
> - seq_printf(seq, "%s %08X:%04X %s ",
> + seq_printf(seq, "%s %08X:%04X %s%s ",
> ip_vs_proto_name(svc->protocol),
> ntohl(svc->addr),
> ntohs(svc->port),
> - svc->scheduler->name);
> + svc->scheduler->name,
> + (svc->flags & IP_VS_SVC_F_ONEPACKET)?
> + " ops":"");
> else
> - seq_printf(seq, "FWM %08X %s ",
> - svc->fwmark, svc->scheduler->name);
> + seq_printf(seq, "FWM %08X %s%s ",
> + svc->fwmark, svc->scheduler->name,
> + (svc->flags & IP_VS_SVC_F_ONEPACKET)?
> + " ops":"");
I'm not entirely comforatle with this proc change. Won't it break
user-space compatiblility?
> if (svc->flags & IP_VS_SVC_F_PERSISTENT)
> seq_printf(seq, "persistent %d %08X\n",
|