LVS
lvs-users
Google
 
Web LinuxVirtualServer.org

Re: invoke scheduler for every received packet

To: "LinuxVirtualServer.org users mailing list." <lvs-users@xxxxxxxxxxxxxxxxxxxxxx>
Subject: Re: invoke scheduler for every received packet
Cc: lvs-users@xxxxxxxxxxxxxxxxxxxxxx
Cc: Wensong Zhang <wensong@xxxxxxxxxxxx>
Cc: Julian Anastasov <ja@xxxxxx>
From: Horms <horms@xxxxxxxxxxxx>
Date: Wed, 12 Jul 2006 13:45:05 +0900 (JST)
On Tue, 11 Jul 2006 09:41:54 +0200, Roberto Nibali wrote:
> Hi,
> 
>> this is a second time I'am addressing list with this question. I have
>> been using LVS with ops patch, which enables invoking scheduler module
>> for every received packet (no connection caching).
> 
> I have just briefly skimmed over this patch and am a bit unsure how
> efficient it is, but it seems to not populate the template cache.  Why
> invoking the scheduler module for every packet is special, I fail to
> see. Having non-persistent scheduling to does the same. But I've only
> looked at the patch for 2 minutes.

Isn't the difference that persistance acts on connections,
where as this patch acts on packets? Of course, UDP ones,
TCP would make no sense.

>> This feature is
>> important to me because I'am using custom scheduler module which expects
>> to receive every packet belonging to same port and IP address pair.
> 
> How does this look?
> 
>> Is it possible to achieve same behavior of LVS on some other way besides
>> using OPS (one packet scheduling) especially on newer kernels? Problem
>> with OPS is that it is not maintained for newer kernels.
> 
> Which newer kernels do you mean exactly? I would believe that
> 
> http://www.ssi.bg/~ja/
> 
> still applies to the latest 2.4.x kernel and that
> 
> http://archive.linuxvirtualserver.org/html/lvs-users/2005-09/msg00214.html
> 
> still applies to the latest 2.6.x kernel with some fuzz, of course. We 
> have actually never reviewed this patch, so I wonder if it would be time 
> to review it and submit for inclusion. Could you give me an exact 
> explanation what it does and how it's used?

I seem to recall needing a similar feature myself at some stage.
The usage that I had was related to SIP, where all packets seem to
come and go to and from the same ports and addresses, but can actually
be loadbalanced independantly.

I am quite happy to push this to DaveM for inclusion. After some reveiw
of course. I will start with a review myself, the patch is taken from
the archive.linuxvirtualserver.org URL above.

-- 
Horms                                           
  H: http://www.vergenet.net/~horms/
  W: http://www.valinux.co.jp/en/

> diff -ruN linux-2.6.13.orig/include/net/ip_vs.h 
> linux-2.6.13/include/net/ip_vs.h
> --- linux-2.6.13.orig/include/net/ip_vs.h     2005-09-28 00:49:21.000000000 
> +0200
> +++ linux-2.6.13/include/net/ip_vs.h  2005-09-28 14:39:32.000000000 +0200
> @@ -19,6 +19,7 @@
>   */
>  #define IP_VS_SVC_F_PERSISTENT       0x0001          /* persistent port */
>  #define IP_VS_SVC_F_HASHED   0x0002          /* hashed entry */
> +#define IP_VS_SVC_F_ONEPACKET        0x0004          /* one-packet 
> scheduling */
>  
>  /*
>   *      Destination Server Flags
> @@ -84,6 +85,7 @@
>  #define IP_VS_CONN_F_IN_SEQ  0x0400          /* must do input seq adjust */
>  #define IP_VS_CONN_F_SEQ_MASK        0x0600          /* in/out sequence mask 
> */
>  #define IP_VS_CONN_F_NO_CPORT        0x0800          /* no client port set 
> yet */
> +#define IP_VS_CONN_F_ONE_PACKET      0x1000          /* forward only one 
> packet */
>  
>  /* Move it to better place one day, for now keep it unique */
>  #define NFC_IPVS_PROPERTY    0x10000
> diff -ruN linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_conn.c 
> linux-2.6.13/net/ipv4/ipvs/ip_vs_conn.c
> --- linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_conn.c      2005-09-28 
> 00:49:23.000000000 +0200
> +++ linux-2.6.13/net/ipv4/ipvs/ip_vs_conn.c   2005-09-28 02:54:55.000000000 
> +0200
> @@ -127,6 +127,9 @@
>       unsigned hash;
>       int ret;
>  
> +     if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
> +             return 0;
> +
>       /* Hash by protocol, client address and port */
>       hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
>  
> @@ -275,6 +278,11 @@
>   */
>  void ip_vs_conn_put(struct ip_vs_conn *cp)
>  {
> +     if (cp->flags & IP_VS_CONN_F_ONE_PACKET) {
> +             ip_vs_conn_expire_now(cp);
> +             return;
> +     }
> +
>       /* reset it expire in its timeout */
>       mod_timer(&cp->timer, jiffies+cp->timeout);
>  
> @@ -506,7 +514,7 @@
>       /*
>        *      unhash it if it is hashed in the conn table
>        */
> -     if (!ip_vs_conn_unhash(cp))
> +     if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
>               goto expire_later;
>  
>       /*
> diff -ruN linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_core.c 
> linux-2.6.13/net/ipv4/ipvs/ip_vs_core.c
> --- linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_core.c      2005-09-28 
> 00:49:23.000000000 +0200
> +++ linux-2.6.13/net/ipv4/ipvs/ip_vs_core.c   2005-09-28 02:54:55.000000000 
> +0200
> @@ -215,6 +215,7 @@
>       struct ip_vs_dest *dest;
>       struct ip_vs_conn *ct;
>       __u16  dport;    /* destination port to forward */
> +     __u16 flags;
>       __u32  snet;     /* source network of the client, after masking */
>  
>       /* Mask saddr with the netmask to adjust template granularity */
> @@ -345,6 +346,9 @@
>               dport = ports[1];
>       }
>  
> +     flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
> +              && iph->protocol == IPPROTO_UDP)?
> +             IP_VS_CONN_F_ONE_PACKET : 0;

Could this be changed to an if then construct?

>       /*
>        *    Create a new connection according to the template
>        */
> @@ -352,7 +356,7 @@
>                           iph->saddr, ports[0],
>                           iph->daddr, ports[1],
>                           dest->addr, dport,
> -                         0,
> +                         flags,
>                           dest);
>       if (cp == NULL) {
>               ip_vs_conn_put(ct);
> @@ -383,6 +387,7 @@
>       struct iphdr *iph = skb->nh.iph;
>       struct ip_vs_dest *dest;
>       __u16 _ports[2], *pptr;
> +     __u16 flags;
>  
>       pptr = skb_header_pointer(skb, iph->ihl*4,
>                                 sizeof(_ports), _ports);
> @@ -412,6 +417,9 @@
>               return NULL;
>       }
>  
> +     flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
> +              && iph->protocol == IPPROTO_UDP)?
> +             IP_VS_CONN_F_ONE_PACKET : 0;

Again, if then, please.

>       /*
>        *    Create a connection entry.
>        */
> @@ -419,7 +427,7 @@
>                           iph->saddr, pptr[0],
>                           iph->daddr, pptr[1],
>                           dest->addr, dest->port?dest->port:pptr[1],
> -                         0,
> +                         flags,
>                           dest);
>       if (cp == NULL)
>               return NULL;
> @@ -462,6 +470,9 @@
>           && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
>               int ret, cs;
>               struct ip_vs_conn *cp;
> +             __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
> +                             iph->protocol == IPPROTO_UDP)?
> +                             IP_VS_CONN_F_ONE_PACKET : 0;
>  
>               ip_vs_service_put(svc);
>  
> @@ -471,7 +482,7 @@
>                                   iph->saddr, pptr[0],
>                                   iph->daddr, pptr[1],
>                                   0, 0,
> -                                 IP_VS_CONN_F_BYPASS,
> +                                 IP_VS_CONN_F_BYPASS | flags,
>                                   NULL);
>               if (cp == NULL)
>                       return NF_DROP;
> diff -ruN linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_ctl.c 
> linux-2.6.13/net/ipv4/ipvs/ip_vs_ctl.c
> --- linux-2.6.13.orig/net/ipv4/ipvs/ip_vs_ctl.c       2005-09-28 
> 00:49:23.000000000 +0200
> +++ linux-2.6.13/net/ipv4/ipvs/ip_vs_ctl.c    2005-09-28 14:49:20.000000000 
> +0200
> @@ -1753,14 +1753,18 @@
>               const struct ip_vs_dest *dest;
>  
>               if (iter->table == ip_vs_svc_table)
> -                     seq_printf(seq, "%s  %08X:%04X %s ",
> +                     seq_printf(seq, "%s  %08X:%04X %s%s ",
>                                  ip_vs_proto_name(svc->protocol),
>                                  ntohl(svc->addr),
>                                  ntohs(svc->port),
> -                                svc->scheduler->name);
> +                                svc->scheduler->name,
> +                                (svc->flags & IP_VS_SVC_F_ONEPACKET)?
> +                                " ops":"");
>               else
> -                     seq_printf(seq, "FWM  %08X %s ",
> -                                svc->fwmark, svc->scheduler->name);
> +                     seq_printf(seq, "FWM  %08X %s%s ",
> +                                svc->fwmark, svc->scheduler->name,
> +                                (svc->flags & IP_VS_SVC_F_ONEPACKET)?
> +                                " ops":"");

I'm not entirely comforatle with this proc change. Won't it break
user-space compatiblility?

>               if (svc->flags & IP_VS_SVC_F_PERSISTENT)
>                       seq_printf(seq, "persistent %d %08X\n",

<Prev in Thread] Current Thread [Next in Thread>