LVS
lvs-devel
Google
 
Web LinuxVirtualServer.org

Re: [PATCHv2 ipvs-next 3/3] netfilter: ipvs: Refactor Maglev hashing sch

To: Julian Anastasov <ja@xxxxxx>
Subject: Re: [PATCHv2 ipvs-next 3/3] netfilter: ipvs: Refactor Maglev hashing scheduler
Cc: lvs-devel@xxxxxxxxxxxxxxx, Simon Horman <horms@xxxxxxxxxxxx>, Wensong Zhang <wensong@xxxxxxxxxxxx>
From: Inju Song <inju.song@xxxxxxxxxxxxx>
Date: Mon, 5 Mar 2018 15:30:54 +0900
        Hello,
        Thanks for your reviewing the patches.

On Wed, Feb 28, 2018 at 01:34:26AM +0200, Julian Anastasov wrote:
> 
>       Hello,
> 
> On Sun, 25 Feb 2018, Inju Song wrote:
> 
> >   1) Change table size to primes array
> > 
> >   2) Add to check dest number > IP_VS_MH_TAB_SIZE
> > 
> >   3) Replace IP_VS_MH_LOOKUP_SIZE with IP_VS_MH_TAB_SIZE
> > 
> >   4) Replace large permutation array with dest_setup
> > 
> >   5) Add gcd, rshift and calculate ds->turns
> 
>       Makefile and Kconfig should be a last patch, otherwise
> on bisecting we will fail to compile.
>

        Ok. I will move Makefile and Kconfig patch to last of the
patchsets in the next v3 patchsets. Then do I include a patch about
last_weight again?

> > Signed-off-by: Inju Song <inju.song@xxxxxxxxxxxxx>
> > ---
> >  net/netfilter/ipvs/ip_vs_mh.c | 291 
> > ++++++++++++++++++++++++++----------------
> >  1 file changed, 178 insertions(+), 113 deletions(-)
> > 
> > diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
> > index d1bf50a..c868778 100644
> > --- a/net/netfilter/ipvs/ip_vs_mh.c
> > +++ b/net/netfilter/ipvs/ip_vs_mh.c
> 
>       There must be license info at the beginning.
> 

        May be I will use GPLv1 license. Should I write the license info
like other source(ie. the beginning of ip_vs_sh.c)?

> > @@ -5,12 +5,13 @@
> >  #include <linux/slab.h>
> >  #include <linux/module.h>
> >  #include <linux/kernel.h>
> > -#include <linux/vmalloc.h>
> >  #include <linux/skbuff.h>
> >  
> >  #include <net/ip_vs.h>
> >  
> >  #include <linux/siphash.h>
> > +#include <linux/bitops.h>
> > +#include <linux/gcd.h>
> >  
> >  #define IP_VS_SVC_F_SCHED_MH_FALLBACK      IP_VS_SVC_F_SCHED1 /* MH 
> > fallback */
> >  #define IP_VS_SVC_F_SCHED_MH_PORT  IP_VS_SVC_F_SCHED2 /* MH use port */
> > @@ -19,23 +20,36 @@ struct ip_vs_mh_lookup {
> >     struct ip_vs_dest __rcu *dest;  /* real server (cache) */
> >  };
> >  
> > -/* for IPVS MH entry hash table */
> > -#ifndef CONFIG_IP_VS_MH_TAB_BITS
> > -#define CONFIG_IP_VS_MH_TAB_BITS        8
> > +struct ip_vs_mh_dest_setup {
> > +   int     offset; /* starting offset */
> > +   int     skip;   /* skip */
> > +   int     perm;   /* next_offset */
> > +   int     turns;  /* weight / gcd() */
> > +};
> > +
> > +/* Available prime numbers for MH table */
> > +static int primes[] = {251, 509, 1021, 2039, 4093,
> > +                  8191, 16381, 32749, 65521, 131071};
> > +
> > +/* For IPVS MH entry hash table */
> > +#ifndef CONFIG_IP_VS_MH_TAB_INDEX
> > +#define CONFIG_IP_VS_MH_TAB_INDEX  12
> >  #endif
> > -#define IP_VS_MH_TAB_BITS               CONFIG_IP_VS_MH_TAB_BITS
> > -#define IP_VS_MH_TAB_SIZE               BIT(IP_VS_MH_TAB_BITS)
> > -#define IP_VS_MH_LOOKUP_SIZE               65537 /* Must be prime number */
> > +#define IP_VS_MH_TAB_BITS          (CONFIG_IP_VS_MH_TAB_INDEX / 2)
> > +#define IP_VS_MH_TAB_INDEX         (CONFIG_IP_VS_MH_TAB_INDEX - 8)
> > +#define IP_VS_MH_TAB_SIZE               primes[IP_VS_MH_TAB_INDEX]
> >  
> >  struct ip_vs_mh_state {
> > -   struct rcu_head         rcu_head;
> > -   struct ip_vs_mh_lookup  lookup[IP_VS_MH_LOOKUP_SIZE];
> > -   unsigned int            **permutation;
> > -   hsiphash_key_t          hash1, hash2;
> > +   struct rcu_head                 rcu_head;
> > +   struct ip_vs_mh_lookup          *lookup;
> > +   struct ip_vs_mh_dest_setup      *dest_setup;
> > +   hsiphash_key_t                  hash1, hash2;
> > +   int                             gcd;
> > +   int                             rshift;
> >  };
> >  
> > -static inline void
> > -ip_vs_mh_generate_hash_secret(hsiphash_key_t *hash1, hsiphash_key_t *hash2)
> > +static inline void generate_hash_secret(hsiphash_key_t *hash1,
> > +                                   hsiphash_key_t *hash2)
> >  {
> >     hash1->key[0] = 2654435761UL;
> >     hash1->key[1] = 2654435761UL;
> > @@ -68,13 +82,13 @@ static inline bool is_unavailable(struct ip_vs_dest 
> > *dest)
> >     return hsiphash(&v, sizeof(v), key);
> >  }
> >  
> > -static inline int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> > -                                struct ip_vs_service *svc)
> > +static int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
> > +                         struct ip_vs_service *svc)
> >  {
> > -   int i, j;
> >     struct list_head *p;
> > +   struct ip_vs_mh_dest_setup *ds;
> >     struct ip_vs_dest *dest;
> > -   unsigned int offset, skip;
> > +   int lw;
> >     bool empty;
> >  
> >     p = &svc->destinations;
> > @@ -82,56 +96,50 @@ static inline int ip_vs_mh_permutate(struct 
> > ip_vs_mh_state *s,
> >     if (empty)
> >             return 0;
> 
>       Above empty and list_empty can be removed, we can
> rely on the gcd < 1 check.
> 

        Ok. I think gcd < 1 check is enough for the destinations
empty case too.

> >  
> > -   /* extending permutation table to 2d arrays */
> > -   for (i = 1; i < svc->num_dests; i++)
> > -           s->permutation[i] = s->permutation[i - 1] +
> > -                               IP_VS_MH_LOOKUP_SIZE;
> > +   /* If gcd is smaller then 1, all last_weights of dest are zero.
> > +    * so skip permutation for the dests.
> > +    */
> > +   if (s->gcd < 1)
> > +           return 0;
> >  
> > -   i = 0;
> > +   /* Set dest_setup for the dests permutation */
> > +   ds = &s->dest_setup[0];
> >     while ((p = p->next) != &svc->destinations) {
> >             dest = list_entry(p, struct ip_vs_dest, n_list);
> > -           offset = ip_vs_mh_hashkey(svc->af, &dest->addr, dest->port,
> > -                                     &s->hash1, 0) % IP_VS_MH_LOOKUP_SIZE;
> > -           skip = ip_vs_mh_hashkey(svc->af, &dest->addr,
> > -                                   dest->port, &s->hash2, 0) %
> > -                                   (IP_VS_MH_LOOKUP_SIZE - 1) + 1;
> > -
> > -           for (j = 0; j < IP_VS_MH_LOOKUP_SIZE; j++) {
> > -                   s->permutation[i][j] = (offset + (j * skip)) %
> > -                                           IP_VS_MH_LOOKUP_SIZE;
> > -           }
> > -           i++;
> > +
> > +           ds->offset = ip_vs_mh_hashkey(svc->af, &dest->addr,
> > +                                         dest->port, &s->hash1, 0) %
> > +                                         IP_VS_MH_TAB_SIZE;
> > +           ds->skip = ip_vs_mh_hashkey(svc->af, &dest->addr,
> > +                                       dest->port, &s->hash2, 0) %
> > +                                       (IP_VS_MH_TAB_SIZE - 1) + 1;
> > +           ds->perm = ds->offset % IP_VS_MH_TAB_SIZE;
> 
>       offset is already < IP_VS_MH_TAB_SIZE, no need to mod.
> 

        It will be removed.

> > +
> > +           lw = atomic_read(&dest->last_weight);
> > +           ds->turns = (lw > 1) ? lw / s->gcd : lw;
> 
>       May be we can move the rshift to the calcs here?
> 
>       ds->turns = ((lw / s->gcd) >> s->rshift) ? : (lw != 0);
> 
>       or optimized in different way but we must be sure
> that rshift will not clear turns when lw > 0.
> 

        I think it is very good optimized way.  I will move
rshift to turns calcs using this way.

> > +           ds++;
> >     }
> >  
> >     return 0;
> >  }
> >  
> > -static inline int
> > -ip_vs_mh_populate(struct ip_vs_mh_state *s, struct ip_vs_service *svc)
> > +static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
> > +                        struct ip_vs_service *svc)
> >  {
> > -   int i, ret, dw_count;
> > -   unsigned int *next;
> > -   unsigned int **pmt;
> > +   int i, n;
> >     struct ip_vs_mh_lookup *entry, *l;
> >     struct list_head *p;
> >     struct ip_vs_dest *dest;
> > -   unsigned int n, c;
> > +   struct ip_vs_mh_dest_setup *ds;
> > +   int c, rs, dt_count;
> >     bool empty;
> >  
> > -   ret = 0;
> > -   next = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(unsigned int),
> > -                  GFP_KERNEL);
> > -   if (!next)
> > -           return -ENOMEM;
> > -
> > -   entry = kcalloc(IP_VS_MH_LOOKUP_SIZE, sizeof(*entry),
> > +   entry = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(*entry),
> >                     GFP_KERNEL);
> 
>       We can avoid this allocation if s->gcd < 1 because
> entry is unused in this case.
> 
> > -   if (!entry) {
> > -           ret = -ENOMEM;
> > -           goto err_alloc;
> > -   }
> > +   if (!entry)
> > +           return -ENOMEM;
> >  
> > -   for (i = 0; i < IP_VS_MH_LOOKUP_SIZE; i++)
> > +   for (i = 0; i < IP_VS_MH_TAB_SIZE; i++)
> >             RCU_INIT_POINTER(entry[i].dest, NULL);
> >  
> >     p = &svc->destinations;
> > @@ -139,47 +147,61 @@ static inline int ip_vs_mh_permutate(struct 
> > ip_vs_mh_state *s,
> >     if (empty)
> >             goto out;
> 
>       We can avoid above list_empty and to use
> empty = (s->gcd < 1) instead. Or may be 'empty' var is not
> need at all if we use bitmap and populate dests one by one,
> see below...
> 
> >  
> > +   /* If gcd is smaller then 1, the weight is zero for all dests.
> > +    * so skip population for MH table and fills the table with NULLs.
> > +    */
> > +   if (s->gcd < 1)
> > +           goto out;
> 
>       if (s->gcd < 1)
>               goto reset;
> 

        Yes, and reset label will be added for this case.

> > +
> >     n = 0;
> > -   dw_count = 0;
> > -   pmt = s->permutation;
> > -   while (n < IP_VS_MH_LOOKUP_SIZE) {
> > +   rs = s->rshift;
> > +   dt_count = 0;
> > +   while (n < IP_VS_MH_TAB_SIZE) {
> >             if (p == &svc->destinations)
> >                     p = p->next;
> >  
> > -           i = 0;
> > +           ds = &s->dest_setup[0];
> >             while (p != &svc->destinations) {
> > -                   c = pmt[i][next[i]];
> > +                   /* ignore added server with zero weight */
> > +                   if (ds->turns < 1) {
> > +                           p = p->next;
> > +                           ds++;
> > +                           continue;
> > +                   }
> >  
> > +                   c = ds->perm;
> >                     while (entry[c].dest) {
> > -                           next[i] = next[i] + 1;
> > -                           c = pmt[i][next[i]];
> > +                           /* Add skip, mod IP_VS_MH_TAB_SIZE */
> > +                           ds->perm += ds->skip;
> > +                           if (ds->perm >= IP_VS_MH_TAB_SIZE)
> > +                                   ds->perm -= IP_VS_MH_TAB_SIZE;
> > +                           c = ds->perm;
> >                     }
> >  
> >                     dest = list_entry(p, struct ip_vs_dest, n_list);
> >                     RCU_INIT_POINTER(entry[c].dest, dest);
> 
>       entry is not visible to RCU readers, so normal access
> is enough. It can be even a bitmap to reduce the memory usage
> 32 or 64 times, eg. we can use 'table' instead of 'entry':
> 
>       unsigned long *table;
>       table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
>                       sizeof(unsigned long));
> 
>       then 'while (entry[c].dest)' becomes 'while (test_bit(c, table))',
>       'entry[c].dest = dest;' becomes '__set_bit(c, table);'
> 
>       And we can directly populate the entry:
> 
>       dest = rcu_dereference_protected(s->lookup[c].dest, 1);
>       dest2 = list_entry(p, struct ip_vs_dest, n_list);
>       if (dest != dest2) {
>               if (dest)
>                       ip_vs_dest_put(dest);
>               ip_vs_dest_hold(dest2);
>               RCU_INIT_POINTER(s->lookup[c].dest, dest2);
>       }
> 

        Ok. Replacing arrays with bitmap will be useful to reduce
memory usuage so It will be replaced in the next patches.

> >  
> > -                   next[i] = next[i] + 1;
> > -                   n++;
> > -                   if (n == IP_VS_MH_LOOKUP_SIZE)
> > -                           break;
> > +                   if (++n == IP_VS_MH_TAB_SIZE)
> > +                           goto out;
> >  
> > -                   if (++dw_count >= atomic_read(&dest->weight)) {
> > +                   if (++dt_count >= (ds->turns >> rs)) {
> 
>       turns can be already rshift-ed in ip_vs_mh_permutate.
> 
> > +                           dt_count = 0;
> >                             p = p->next;
> > -                           dw_count = 0;
> > -                           i++;
> > +                           ds++;
> >                     }
> >             }
> >     }
> 
>       Here we should jump to kfree(table):
> 
>       goto out;
> 
> >  
> >  out:
> 
> out: becomes reset:
> 
>       This loop will remain for the 's->gcd < 1' case only:
> 
> >     l = &s->lookup[0];
> > -   for (i = 0; i < IP_VS_MH_LOOKUP_SIZE; i++) {
> > -           dest = rcu_dereference_protected(entry[i].dest, 1);
> > +   for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
> > +           dest = rcu_dereference_protected(l->dest, 1);
> >             if (dest)
> >                     ip_vs_dest_put(dest);
> >             if (empty) {
> >                     RCU_INIT_POINTER(l->dest, NULL);
> >             } else {
> 
>       This block will be removed:
> 
> > +                   dest = rcu_dereference_protected(entry[i].dest, 1);
> >                     ip_vs_dest_hold(dest);
> >                     RCU_INIT_POINTER(l->dest, dest);
> >  
> > @@ -193,9 +215,7 @@ static inline int ip_vs_mh_permutate(struct 
> > ip_vs_mh_state *s,
> >     }
> >  
> 
> out: label comes here to kfree bitmap 'table'.
> 
> >     kfree(entry);
> > -err_alloc:
> > -   kfree(next);
> > -   return ret;
> > +   return 0;
> >  }
> >  
> >  /* Get ip_vs_dest associated with supplied parameters. */
> > @@ -204,14 +224,13 @@ static inline int ip_vs_mh_permutate(struct 
> > ip_vs_mh_state *s,
> >          const union nf_inet_addr *addr, __be16 port)
> >  {
> >     unsigned int hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1, 0)
> > -                                        % IP_VS_MH_LOOKUP_SIZE;
> > +                                        % IP_VS_MH_TAB_SIZE;
> >     struct ip_vs_dest *dest = rcu_dereference(s->lookup[hash].dest);
> >  
> >     return (!dest || is_unavailable(dest)) ? NULL : dest;
> >  }
> >  
> > -/* As ip_vs_mh_get, but with fallback if selected server is unavailable
> > - */
> > +/* As ip_vs_mh_get, but with fallback if selected server is unavailable */
> >  static inline struct ip_vs_dest *
> >  ip_vs_mh_get_fallback(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
> >                   const union nf_inet_addr *addr, __be16 port)
> > @@ -222,7 +241,7 @@ static inline int ip_vs_mh_permutate(struct 
> > ip_vs_mh_state *s,
> >  
> >     /* first try the dest it's supposed to go to */
> >     ihash = ip_vs_mh_hashkey(svc->af, addr, port,
> > -                            &s->hash1, 0) % IP_VS_MH_LOOKUP_SIZE;
> > +                            &s->hash1, 0) % IP_VS_MH_TAB_SIZE;
> >     dest = rcu_dereference(s->lookup[ihash].dest);
> >     if (!dest)
> >             return NULL;
> > @@ -235,10 +254,10 @@ static inline int ip_vs_mh_permutate(struct 
> > ip_vs_mh_state *s,
> >     /* if the original dest is unavailable, loop around the table
> >      * starting from ihash to find a new dest
> >      */
> > -   for (offset = 0; offset < IP_VS_MH_LOOKUP_SIZE; offset++) {
> > -           roffset = (offset + ihash) % IP_VS_MH_LOOKUP_SIZE;
> > +   for (offset = 0; offset < IP_VS_MH_TAB_SIZE; offset++) {
> > +           roffset = (offset + ihash) % IP_VS_MH_TAB_SIZE;
> >             hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1,
> > -                                   roffset) % IP_VS_MH_LOOKUP_SIZE;
> > +                                   roffset) % IP_VS_MH_TAB_SIZE;
> >             dest = rcu_dereference(s->lookup[hash].dest);
> >             if (!dest)
> >                     break;
> > @@ -261,7 +280,7 @@ static void ip_vs_mh_flush(struct ip_vs_mh_state *s)
> >     struct ip_vs_dest *dest;
> >  
> >     l = &s->lookup[0];
> > -   for (i = 0; i < IP_VS_MH_LOOKUP_SIZE; i++) {
> > +   for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
> >             dest = rcu_dereference_protected(l->dest, 1);
> >             if (dest) {
> >                     ip_vs_dest_put(dest);
> > @@ -271,46 +290,85 @@ static void ip_vs_mh_flush(struct ip_vs_mh_state *s)
> >     }
> >  }
> >  
> > -/* Assign all the hash buckets of the specified table with the service.
> > - */
> > -static int
> > -ip_vs_mh_reassign(struct ip_vs_mh_state *s, struct ip_vs_service *svc)
> > +/* Assign all the hash buckets of the specified table with the service. */
> > +static int ip_vs_mh_reassign(struct ip_vs_mh_state *s,
> > +                        struct ip_vs_service *svc)
> >  {
> >     int ret;
> >  
> > -   /* ip_vs_mh_reassign is responsible for assigning
> > -    * and releasing s->permutation table
> > -    */
> > -   s->permutation = vzalloc(IP_VS_MH_TAB_SIZE * sizeof(unsigned int *));
> > -   if (!s->permutation)
> > -           return -ENOMEM;
> > +   if (svc->num_dests > IP_VS_MH_TAB_SIZE)
> > +           return -EINVAL;
> >  
> > -   s->permutation[0] = vzalloc(IP_VS_MH_TAB_SIZE * IP_VS_MH_LOOKUP_SIZE *
> > -                               sizeof(unsigned int));
> > -   if (!s->permutation[0]) {
> > -           ret = -ENOMEM;
> > -           goto err_alloc;
> > -   }
> > +   s->dest_setup = kcalloc(IP_VS_MH_TAB_SIZE,
> > +                           sizeof(struct ip_vs_mh_dest_setup),
> > +                           GFP_KERNEL);
> 
>       For s->dest_setup we need to allocate svc->num_dests entries, 
> IP_VS_MH_TAB_SIZE is too large. But if num_dests=0 we should avoid
> the allocation, then after kfree(s->dest_setup) we should use
> s->dest_setup = NULL to avoid double free.
> 

        Yep, It will be fixed.

> > +   if (!s->dest_setup)
> > +           return -ENOMEM;
> >  
> > -   ret = ip_vs_mh_permutate(s, svc);
> > -   if (ret < 0)
> > -           goto err_out;
> > +   ip_vs_mh_permutate(s, svc);
> >  
> >     ret = ip_vs_mh_populate(s, svc);
> >     if (ret < 0)
> > -           goto err_out;
> > +           goto out;
> >  
> >     IP_VS_DBG_BUF(6, "MH: reassign lookup table of %s:%d\n",
> >                   IP_VS_DBG_ADDR(svc->af, &svc->addr),
> >                   ntohs(svc->port));
> >  
> > -err_out:
> > -   vfree(s->permutation[0]);
> > -err_alloc:
> > -   vfree(s->permutation);
> > +out:
> > +   kfree(s->dest_setup);
> >     return ret;
> >  }
> >  
> > +static int ip_vs_mh_gcd_weight(struct ip_vs_service *svc)
> > +{
> > +   struct ip_vs_dest *dest;
> > +   int weight;
> > +   int g = 0;
> > +
> > +   list_for_each_entry(dest, &svc->destinations, n_list) {
> > +           weight = atomic_read(&dest->last_weight);
> > +           if (weight > 0) {
> > +                   if (g > 0)
> > +                           g = gcd(weight, g);
> > +                   else
> > +                           g = weight;
> > +           }
> > +   }
> > +   return g;
> > +}
> > +
> > +/* To avoid assigning huge weight for the MH table,
> > + * calculate shift value with gcd.
> > + */
> > +static int ip_vs_mh_shift_weight(struct ip_vs_service *svc, int gcd)
> > +{
> > +   struct ip_vs_dest *dest;
> > +   int new_weight, weight = 0;
> > +   int mw, shift;
> > +
> > +   /* If gcd is smaller then 1, the weight is zero for all dests.
> > +    * skip calculation rshift.
> > +    */
> > +   if (gcd < 1)
> > +           return 0;
> > +
> > +   list_for_each_entry(dest, &svc->destinations, n_list) {
> > +           new_weight = atomic_read(&dest->last_weight);
> > +           if (new_weight > weight)
> > +                   weight = new_weight;
> > +   }
> > +
> > +   /* Because gcd is greater than zero,
> > +    * the maximum weight and gcd are always greater than zero
> > +    */
> > +   mw = weight / gcd;
> > +
> > +   /* shift = occupied bits of weight/gcd - MH highest bits */
> > +   shift = fls(mw) - IP_VS_MH_TAB_BITS;
> > +   return (shift >= 0) ? shift : 0;
> > +}
> > +
> >  static int ip_vs_mh_init_svc(struct ip_vs_service *svc)
> >  {
> >     struct ip_vs_mh_state *s;
> > @@ -320,18 +378,24 @@ static int ip_vs_mh_init_svc(struct ip_vs_service 
> > *svc)
> >     if (!s)
> >             return -ENOMEM;
> >  
> > -   svc->sched_data = s;
> > +   s->lookup = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(struct ip_vs_mh_lookup),
> > +                       GFP_KERNEL);
> > +   if (!s->lookup) {
> > +           kfree_rcu(s, rcu_head);
> 
>       kfree(s) is ok here.
> 
> > +           return -ENOMEM;
> > +   }
> > +
> > +   generate_hash_secret(&s->hash1, &s->hash2);
> > +   s->gcd = ip_vs_mh_gcd_weight(svc);
> > +   s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
> >  
> > +   svc->sched_data = s;
> >     IP_VS_DBG(6,
> >               "MH lookup table (memory=%zdbytes) allocated for current 
> > service\n",
> > -             sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_LOOKUP_SIZE);
> > -
> > -   ip_vs_mh_generate_hash_secret(&s->hash1, &s->hash2);
> > +             sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
> >  
> >     /* permutate & populate with current dests */
> > -   ip_vs_mh_reassign(s, svc);
> > -
> > -   return 0;
> > +   return ip_vs_mh_reassign(s, svc);
> >  }
> >  
> >  static void ip_vs_mh_done_svc(struct ip_vs_service *svc)
> > @@ -344,7 +408,7 @@ static void ip_vs_mh_done_svc(struct ip_vs_service *svc)
> >     /* release the table itself */
> >     kfree_rcu(s, rcu_head);
> 
>       s->lookup is not freed, we should free both
> s and s->lookup in new RCU callback. It will need a change in
> ip_vs_mh_cleanup: synchronize_rcu() -> rcu_barrier(), so that
> module unload to wait for the callback to complete.
> See ip_vs_lblcr.c how call_rcu() is called to schedule
> callback. We will need:
> 
> static void ip_vs_mh_state_free(struct rcu_head *head)
> {
>         struct ip_vs_mh_state *s;
> 
>       s = container_of(head, struct ip_vs_mh_state, rcu_head);
>       kfree(s->lookup);
>       kfree(s);
> }
> 

        I missed freeing s->lookup. Thanks for letting me know.
I will apply these reviews.

> >     IP_VS_DBG(6, "MH lookup table (memory=%zdbytes) released\n",
> > -             sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_LOOKUP_SIZE);
> > +             sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
> >  }
> >  
> >  static int ip_vs_mh_dest_changed(struct ip_vs_service *svc,
> > @@ -352,10 +416,11 @@ static int ip_vs_mh_dest_changed(struct ip_vs_service 
> > *svc,
> >  {
> >     struct ip_vs_mh_state *s = svc->sched_data;
> >  
> > -   /* assign the hash buckets with the updated service */
> > -   ip_vs_mh_reassign(s, svc);
> > +   s->gcd = ip_vs_mh_gcd_weight(svc);
> > +   s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
> >  
> > -   return 0;
> > +   /* assign the hash buckets with the updated service */
> > +   return ip_vs_mh_reassign(s, svc);
> >  }
> >  
> >  /* Helper function to get port number */
> > -- 
> > 1.8.3.1
> 
> Regards
> 
> --
> Julian Anastasov <ja@xxxxxx>
> 
> 

Regards

-- 
Inju Song
NAVER Corporation

--
To unsubscribe from this list: send the line "unsubscribe lvs-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

<Prev in Thread] Current Thread [Next in Thread>